From 3d619cbe6996c9492d9aa54954d39358ce61b56f Mon Sep 17 00:00:00 2001 From: Ian Hellen Date: Tue, 23 Mar 2021 19:50:35 -0700 Subject: [PATCH] Adding ip_summary notebooklet Multiple updates and fixes to account_summary Multiple fixes to network_flow_summary Updating version to 0.2.0 Added some utility functions to common.py and notebooklet.py - check_valid_result_data - check_table_exists - get_methods/list_methods (lists only methods defined on subclasses, not Notebooklet class) Split NotebooketResult into separate module notebooklet_result.py Added ability to invoke notebooklet functions from results class Fixes/regularization to host.host.py and iptools.py. Added VPS lookup Added alert.py alert browser Added several test data sets such as azure_activity_df, az_net_df (interface), vmcomputer_df, host_hb_df Added mock classes for TILookup and GeoIP for testing Switched several test modules to native pytest format. --- docs/source/conf.py | 8 +- docs/source/readme.md | 537 ++++++++++++++ msticnb/__init__.py | 4 +- msticnb/_version.py | 2 +- msticnb/common.py | 39 +- msticnb/data_providers.py | 124 ++-- msticnb/nb/azsent/account/account_summary.py | 145 ++-- .../nb/azsent/account/account_summary.yaml | 5 +- msticnb/nb/azsent/network/ip_summary.py | 680 ++++++++++++++++++ msticnb/nb/azsent/network/ip_summary.yaml | 66 ++ .../nb/azsent/network/network_flow_summary.py | 86 ++- .../azsent/network/network_flow_summary.yaml | 2 +- msticnb/nb_pivot.py | 6 +- msticnb/nblib/azsent/alert.py | 26 + msticnb/nblib/azsent/host.py | 119 ++- msticnb/nblib/entity_tools.py | 47 ++ msticnb/nblib/iptools.py | 79 +- msticnb/notebooklet.py | 214 ++---- msticnb/notebooklet_result.py | 173 +++++ .../nb/azsent/account/test_account_summary.py | 11 +- tests/nb/azsent/alert/test_ti_enrich.py | 13 +- tests/nb/azsent/host/test_host_summary.py | 43 +- tests/nb/azsent/host/test_hostlogonsummary.py | 20 +- tests/nb/azsent/host/test_win_host_events.py | 55 +- tests/nb/azsent/network/test_ip_summary.py | 126 ++++ .../network/test_network_flow_summary.py | 70 +- tests/nb/template/test_nb_template.py | 43 +- tests/test_common.py | 229 +++--- tests/test_dataprovider.py | 128 ++-- tests/test_metadata.py | 74 +- tests/test_nb_pivot.py | 29 +- tests/test_notebooklet.py | 260 +++---- tests/test_read_modules.py | 86 ++- tests/testdata/az_net_df.pkl | Bin 0 -> 10735 bytes tests/testdata/az_net_if_df.pkl | Bin 0 -> 18625 bytes tests/testdata/azure_activity_df.pkl | Bin 0 -> 89507 bytes tests/testdata/host_hb_df.pkl | Bin 0 -> 2767 bytes tests/testdata/local_data.yaml | 76 +- tests/testdata/vmcomputer_df.pkl | Bin 0 -> 50819 bytes tests/unit_test_lib.py | 138 ++++ 40 files changed, 2916 insertions(+), 847 deletions(-) create mode 100644 docs/source/readme.md create mode 100644 msticnb/nb/azsent/network/ip_summary.py create mode 100644 msticnb/nb/azsent/network/ip_summary.yaml create mode 100644 msticnb/nblib/azsent/alert.py create mode 100644 msticnb/nblib/entity_tools.py create mode 100644 msticnb/notebooklet_result.py create mode 100644 tests/nb/azsent/network/test_ip_summary.py create mode 100644 tests/testdata/az_net_df.pkl create mode 100644 tests/testdata/az_net_if_df.pkl create mode 100644 tests/testdata/azure_activity_df.pkl create mode 100644 tests/testdata/host_hb_df.pkl create mode 100644 tests/testdata/vmcomputer_df.pkl diff --git a/docs/source/conf.py b/docs/source/conf.py index 7568846..85c07d4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,9 +24,9 @@ sys.path.insert(0, os.path.abspath("../..")) # -- Project information ----------------------------------------------------- - +# pylint: disable=redefined-builtin, invalid-name project = "msticnb" -# pylint: disable=redefined-builtin + copyright = "2020, (c) Microsoft Corporation." author = "Ian Hellen, Pete Bryan" @@ -78,7 +78,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] +exclude_patterns: list = [] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None @@ -127,7 +127,7 @@ # -- Options for LaTeX output ------------------------------------------------ -latex_elements = { +latex_elements: dict = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', diff --git a/docs/source/readme.md b/docs/source/readme.md new file mode 100644 index 0000000..7411b54 --- /dev/null +++ b/docs/source/readme.md @@ -0,0 +1,537 @@ +Creating Notebooklets +===================== + +Most of the process of creating a notebook is documented in the +[nb_template](https://github.com/microsoft/msticnb/blob/master/msticnb/nb/template/nb_template.py) +module. You can use this as a starting point for creating a notebooklet. + +Notebooklets have two components: + +- A python module containing the code that does all of the processing + work that you\'d normally write directly into notebook cells. +- A yaml file that contains configuration, documentation and text + content that you want to display as part of your notebooklet\'s + output. + +Custom notebooklets must be in a package of their own (although you can +have multiple notebooklets in the same package) so also require an +`__init__.py` in the same folder. + +Notebooklets are loaded by calling `nb.discover_modules()` function and +specifying the path to the notebooklets package with the `nb_path` +parameter. (see +:py`discover_modules`{.interpreted-text +role="func"}) + +Notebooklet module +------------------ + +The notebooklet module has three main sections: + +- **Result class definition**: This defines the attributes and + descriptions of the data that you want to return from the + notebooklet. +- **Notebooklet class definition**: This is the entry point for + running the notebooklet. At minimum it should be a class derived + from Notebooklet that implements a [run]{.title-ref} method and + returns your result class. +- **Functions**: These do most of the work of the notebooklet and + usually the code that is copied from or adapted from the original + notebook. + +Having the latter section is optional. You can choose to implement this +functionality in instance methods of the notebooklet class. + +However, there are advantages to keeping these as separate functions +outside the class. It means that all the data used in the functions has +to be passed around as parameters and return values. This can improve +the clarity of the code and reduce errors due to some dependency on some +mysterious global state. + +If the user of your notebooklet wants to import the module\'s code into +a notebook to read and possibly adapt it, having standalone functions +will make it easier from them understand and work with the code. + +### Results Class + +This is derived from the +:py`NotebookletResult`{.interpreted-text +role="class"} It is also an [attrs class](https://www.attrs.org) so +needs to be decorated with the \@attr decorator. + +``` {.python} +@attr.s(auto_attribs=True) +class TemplateResult(NotebookletResult): + """ + Template Results. + + Attributes + ---------- + all_events : pd.DataFrame + DataFrame of all raw events retrieved. + plot : bokeh.models.LayoutDOM + Bokeh plot figure showing the account events on an + interactive timeline. + additional_info: dict + Additional information for my notebooklet. + + """ + + description: str = "Windows Host Security Events" + + # Add attributes as needed here. + # Make sure they are documented in the Attributes section + # above. + all_events: pd.DataFrame = None + plot: Figure = None + additional_info: Optional[dict] = None +``` + +The class is just a collection of attributes containing results that you +want to return to the user. It is a good idea to add type hints that +define what data type each attribute contains. Adding documentation for +each attribute is important. This not only helps when reading the code +or using the Python help() function but it is also used to automatically +generate titles and descriptive text when you display the results class. + +### The Notebooklet class + +The notebooklet class is the main engine behind a notebooklet. It is +derived from +:py`Notebooklet`{.interpreted-text +role="class"} + +``` {.python} +class TemplateNB(Notebooklet): + """ + Template Notebooklet class. + + Detailed description of things this notebooklet does: + + - Fetches all events from XYZ + - Plots interesting stuff + - Returns extended metadata about the thing + + Document the options that the Notebooklet takes, if any, + Use these control which parts of the notebooklet get run. + + """ + # assign metadata from YAML to class variable + metadata = _CLS_METADATA + __doc__ = nb_metadata.update_class_doc(__doc__, metadata) + _cell_docs = _CELL_DOCS +``` + +The first section of the the class definition contains the docstring. +This documentation is used by the notebooklet browser and the +show_help() function to provide extended user-friendly help. + +The first three lines of code handle assiging metadata and documentation +data from the notebooklet yaml file (see below) so that the notebooklet +code can access it. + +::: {.warning} +::: {.title} +Warning +::: + +Do not change these lines unless you know what you are doing. +::: + +#### The run method + +:py`Notebooklet.run`{.interpreted-text +role="func"} + +The next section is the all-important `run` method. This method is the +main entry point to the notebooklet and controls the flow of most of the +logic. You can add other methods to do subsequent tasks but you should +always implement a run method. + +``` {.python} +# @set_text decorator will display the title and text every time +# this method is run. +# The key value refers to an entry in the `output` section of +# the notebooklet yaml file. +@set_text(docs=_CELL_DOCS, key="run") +def run( + self, + value: Any = None, + data: Optional[pd.DataFrame] = None, + timespan: Optional[TimeSpan] = None, + options: Optional[Iterable[str]] = None, + **kwargs, +) -> TemplateResult: + """ + Return XYZ summary. + + Parameters + ---------- + value : str + Host name - The key for searches - e.g. host, account, IPaddress + data : Optional[pd.DataFrame], optional + Alternatively use a DataFrame as input. + timespan : TimeSpan + Timespan for queries + options : Optional[Iterable[str]], optional + List of options to use, by default None. + A value of None means use default options. + Options prefixed with "+" will be added to the default options. + To see the list of available options type `help(cls)` where + "cls" is the notebooklet class or an instance of this class. + + Returns + ------- + TemplateResult + Result object with attributes for each result type. + + Raises + ------ + MsticnbMissingParameterError + If required parameters are missing + + """ +``` + +Most of this is class documentation - again this is used in the browser +and user help so you should document this as shown. Usually you can just +copy and paste this example and edit the text to suit your needs - for +example, changing the description `value` if you are expecting an IP +address. + +Do not rename or add to these explicit parameters since they are +referenced by the base class. If you want additional parameters you can +supply them as keyword arguments and extract them from kwargs. Be sure +to document any keyword arguments that you require. + +#### The set_text decorator + +The `@set_text` decorator requires some explanation. This decorator +gives you the ability to output display text every time `run()` is +called. It references the \_CELL_DOCS dictionary, which is read from the +yaml metadata file, and specifies a key which is used to look up the +exact section from the file to use. + +You can optionally add explicit title and text as parameters to +`set_text` using the `title`, `text` and `hd_level` parameters. This is +documented here :py`set_text`{.interpreted-text +role="func"} + +The set_text decorator does not display any text if you run the +notebooklet with `silent=True` parameter. + +#### The run method body + +``` {.python} +# This line use logic in the superclass to populate options +# (including default options) into this class. +super().run( + value=value, data=data, timespan=timespan, options=options, **kwargs +) +``` + +Calling the base class `run` method from your implementation is +important. This does things like handle options and optionall convert +and normalize the timespan parameter. + +The next section validates any input parameters that you require and +creates a results class to store your output data. Assigning the +description and the timespan being used to the results object is very +helpful when you need to refer back to the result or possibly make +additional ad hoc queries afterwards. + +``` {.python} +if not value: + raise MsticnbMissingParameterError("value") +if not timespan: + raise MsticnbMissingParameterError("timespan.") + +# Create a result class +result = TemplateResult() +result.description = self.metadata.description +result.timespan = timespan +``` + +The remainder of the run method is just about the logic of what you want +to execute and in what order. + +::: {.note} +::: {.title} +Note +::: + +be sure to assign your results class to `self._last_result`. This will +expose the result class as a `result` property of your notebooklet +instance and allow other methods in your class to reference it. +::: + +``` {.python} +# You might want to always do some tasks irrespective of +# options sent +all_events_df = _get_all_events( + self.query_provider, host_name=value, timespan=timespan +) +result.all_events = all_events_df + +if "plot_events" in self.options: + result.plot = _display_event_timeline(acct_event_data=all_events_df) + +if "get_metadata" in self.options: + result.additional_info = _get_metadata(host_name=value, timespan=timespan) + +# Assign the result to the _last_result attribute +# so that you can get to it without having to re-run the operation +self._last_result = result # pylint: disable=attribute-defined-outside-init + +return self._last_result +``` + +You can call additional methods unconditionally or use the option logic +to allow users to add additional operations or skip ones that they are +not interested in. The available and default options for your +notebooklet are defined in the notebooklet yaml file. + +If you call run() without specifying the options parameter, the defaults +will be used. You can specify a custom set of options as a list of +option names (strings). + +`options=["opt1", "opt2", "opt4"]` + +You can also specify an incremental list. For example: + +- `options=["+option_a"]` will add \"option_a\" to the list of default + options. +- `options=["+option_a", "-option_b"]` will add \"option_a\" and + remove \"option_b\" from the defaults. + +::: {.note} +::: {.title} +Note +::: + +You cannot mix the explicit options with the incremental options syntax. +::: + +Be sure to assign the output from the called functions to the relevant +attributes of your result and return the result at the end. + +#### Additional notebooklet methods + +Often you will not want to or not be able to execute additional +functionality within the run command. You may require the user to choose +an option before starting a second step or you may want to provide some +kind of data browsing capability that is interactive and needs to the +run method to have completed. + +You can do this by adding methods to your notebooklet class. Any public +methods you create will be added to the auto-documentation of the +notebooklet. + +This is an example method. Note that if you depend on the result being +populated, you should check this and issue a warning if it is not (as +shown). + +``` {.python} +def run_additional_operation( + self, event_ids: Optional[Union[int, Iterable[int]]] = None +) -> pd.DataFrame: + """ + Addition method. + + Parameters + ---------- + event_ids : Optional[Union[int, Iterable[int]]], optional + Single or interable of event IDs (ints). + + Returns + ------- + pd.DataFrame + Results with expanded columns. + + """ + # Include this to check the "run()" has happened before this method + # can be run + if ( + not self._last_result or self._last_result.all_events is None + ): # type: ignore + print( + "Please use 'run()' to fetch the data before using this method.", + "\nThen call 'expand_events()'", + ) + return None + # Print a status message - this will not be displayed if + # the user has set the global "verbose" option to False. + nb_print("We maybe about to wait some time") + + nb_markdown("Print some message that always displays", "blue, bold") + return _do_additional_thing( + evt_df=self._last_result.all_events, # type: ignore + event_ids=event_ids, + ) + # Note you can also assign new items to the result class in + # self._last_result and return the updated result class. +``` + +One thing to note here is the use of +:py`nb_markdown`{.interpreted-text +role="func"} and +:py`nb_print`{.interpreted-text role="func"} +(there is also an +:py`nb_display`{.interpreted-text +role="func"} function). These are simple wrappers around +IPython.display.markdown(), Python print() and +IPython.display.display(). These functions honor the `silent` parameter. +This can be supplied to the notebooklet `__init__` method (when creating +an instance of the class) or the `run` method. If silent is True then +these functions do not display any output. You are free to use whatever +output functions you choose but the notebooklet may produce unexpected +output if the user has set the silent option to True. + +::: {.note} +::: {.title} +Note +::: + +You can access `self.silent` to query the current setting. You can also +set the silent option globally by using `nb.set_opt("silent", True)` +(see :py`set_opt`{.interpreted-text +role="func"}) +::: + +### Worker Functions + +To keep the notebooklet class simple, most of the work done by the +notebooklet is usually coded in separate module functions. These are +usually declares as private functions by prefixing with \"\_\" + +This simple function executes a query and returns the results. The query +provider, hostname and timespan are supplied in the call from the +notebooklet run method. + +``` {.python3} +def _get_all_events(qry_prov, host_name, timespan): + # Tell the user that you're fetching data + # (doesn't display if nb.set_opt("silent", True)) + nb_data_wait("SecurityEvent") + return qry_prov.WindowsSecurity.list_host_events( + timespan, + host_name=host_name, + add_query_items="| where EventID != 4688 and EventID != 4624", + ) +``` + +:py`nb_data_wait`{.interpreted-text +role="func"} just outputs a standard message telling the user that data +is being retrieved. + +This is another example showing the use of the `@set_text` decorator. +The output from this will be displayed as the plot is shown. The plot +layout object is returned to the notebooklet class and added to the +results class (shown earlier). + +``` {.python3} +@set_text(docs=_CELL_DOCS, key="display_event_timeline") +def _display_event_timeline(acct_event_data): + # Plot events on a timeline + + # Note the nbdisplay function is a wrapper around IPython.display() + # However, it honors the "silent" option (global or per-notebooklet) + # which allows you to suppress output while running. + return nbdisplay.display_timeline( + data=acct_event_data, + group_by="EventID", + source_columns=["Activity", "Account"], + legend="right", + ) +``` + +Notebook YAML file +------------------ + +The notebooklet yaml file should have the same name as the Python module +but with a \"yaml\" or \"yml\" extension. + +There are two main sections: `metadata` and `output`. + +``` {.YAML} +metadata: + name: TemplateNB + description: Template YAML for Notebooklet + default_options: + - all_events: Gets all events about blah + - plot_events: + Display and summary and timeline of events. + other_options: + - get_metadata: fetches additional metadata about the entity + keywords: + - host + - computer + - heartbeat + - windows + - account + entity_types: + - host + req_providers: + - AzureSentinel|LocalData + - tilookup +``` + +The metadata section defines runtime parameters for the notebooklet. +These include: + +- the notebooklet display name +- the notebooklet description +- the default options (a list of key/value pairs of option name and + description) +- other options available +- keywords (used in searching for the notebooklet +- entity types - mainly informational so that a user can find all + notebooklets that deal with hosts, IP addresses, etc. +- req_providers - this is a list of data providers required for the + notebooklet to run. You can provide alternates (as shown), which + means that if one of the providers is available the notebooklet will + load successfully. + +``` {.YAML} +output: + run: + title: Title for the run method (main title) + hd_level: 1 + text: + Write your introductory text here + + Data and plots are stored in the result class returned by this function. + + If you use **markdown** syntax in this block add the following + to use markdown processing. + md: True + display_event_timeline: + title: Display the timeline. + text: ' + This may take some time to complete for large numbers of events. + + It will do: + - Item one + - Item two + + Since some groups will be undefined these can show up as `NaN`. + + Note: use a quoted string if you want to include yaml reserved chars + such as ":" + ' + md: True +``` + +The output section defines the display text for the `@set_text` +decorator function used in the notebooklet module. The key for each +section under output must match the value for the `key` parameter in the +call to `set_text`. + +Each section has the following sub-keys: + +- title: the title to display (by default as HTML h2 or Markdown + \"\#\#\") +- hd_level: (1-4) to override the default heading level +- text: the body text to display. This will display as plain text by + default +- md: set to True to process the \"text\" value as Markdown. diff --git a/msticnb/__init__.py b/msticnb/__init__.py index 70a393c..31fa18b 100644 --- a/msticnb/__init__.py +++ b/msticnb/__init__.py @@ -8,6 +8,7 @@ To start using notebooklets: >>> import msticnb as nb +>>> # optionally give a query provider nb.init(query_provider=qry_prov) >>> nb.init() >>> >>> # Auto-complete tree of notebooklets @@ -39,6 +40,7 @@ from .read_modules import discover_modules, nblts, nb_index, find # noqa:F401 from .options import get_opt, set_opt # noqa:F401 from .nb_browser import NBBrowser # noqa:F401 +from .nb_pivot import add_pivot_funcs # noqa:F401 from ._version import VERSION @@ -47,4 +49,4 @@ # pylint: disable=invalid-name browse = NBBrowser discover_modules() -print(len(list(nblts.iter_classes())), "notebooklets loaded.") +print(f"Notebooklets: {len(list(nblts.iter_classes()))} notebooklets loaded.") diff --git a/msticnb/_version.py b/msticnb/_version.py index ca5e4cd..8e206b1 100644 --- a/msticnb/_version.py +++ b/msticnb/_version.py @@ -1,2 +1,2 @@ """Version file.""" -VERSION = "0.1.0" +VERSION = "0.2.0" diff --git a/msticnb/common.py b/msticnb/common.py index e04b0e6..eb43739 100644 --- a/msticnb/common.py +++ b/msticnb/common.py @@ -7,7 +7,9 @@ import functools from typing import Union, Optional, Iterable, Tuple, Any, List, Dict +import bokeh.io from IPython.display import display, HTML +from IPython import get_ipython from markdown import markdown from msticpy.common import utility as mp_utils @@ -20,6 +22,9 @@ __author__ = "Ian Hellen" +_IP_AVAILABLE = get_ipython() is not None + + class NBContainer: """Container for Notebooklet classes.""" @@ -63,9 +68,9 @@ def iter_classes(self) -> Iterable[Tuple[str, Any]]: yield key, val -def nb_print(mssg: Any): +def nb_print(*args): """ - Print a status message. + Print output but suppress if "silent". Parameters ---------- @@ -74,7 +79,7 @@ def nb_print(mssg: Any): """ if get_opt("verbose") and not get_opt("silent"): - print(mssg) + print(*args) def nb_data_wait(source: str): @@ -87,7 +92,7 @@ def nb_data_wait(source: str): The data source. """ - nb_print(f"Getting data from {source}...") + nb_markdown(f"Getting data from {source}...") def nb_debug(*args): @@ -101,13 +106,19 @@ def nb_debug(*args): def nb_markdown(*args, **kwargs): """Display Markdown/HTML text.""" if not get_opt("silent"): - mp_utils.md(*args, **kwargs) + if _IP_AVAILABLE: + mp_utils.md(*args, **kwargs) + else: + nb_print(*args) def nb_warn(*args, **kwargs): """Display Markdown/HTML warning text.""" if not get_opt("silent"): - mp_utils.md_warn(*args, **kwargs) + if _IP_AVAILABLE: + mp_utils.md_warn(*args, **kwargs) + else: + nb_print("WARNING:", *args) def nb_display(*args, **kwargs): @@ -227,6 +238,22 @@ def add_results(*args, **kwargs): return result_wrapper +def show_bokeh(plot): + """Display bokeh plot, resetting output.""" + try: + bokeh.io.reset_output() + bokeh.io.output_notebook(hide_banner=True) + bokeh.io.show(plot) + except RuntimeError: + bokeh.io.output_notebook(hide_banner=True) + bokeh.io.show(plot) + + +def df_has_data(data) -> bool: + """Return True if `data` DataFrame has data.""" + return data is not None and not data.empty + + class MsticnbError(Exception): """Generic exception class for Notebooklets.""" diff --git a/msticnb/data_providers.py b/msticnb/data_providers.py index c066d28..1dc581b 100644 --- a/msticnb/data_providers.py +++ b/msticnb/data_providers.py @@ -4,23 +4,23 @@ # license information. # -------------------------------------------------------------------------- """Data Providers class and init function.""" -from collections import namedtuple import inspect -from typing import Optional, List, Dict, Any, Iterable, Tuple import sys +from collections import namedtuple +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union +from msticpy.common.exceptions import MsticpyAzureConfigError +from msticpy.common.wsconfig import WorkspaceConfig from msticpy.data import QueryProvider -from msticpy.data.query_defns import DataEnvironment from msticpy.data.azure_data import AzureData -from msticpy.common.wsconfig import WorkspaceConfig -from msticpy.common.exceptions import MsticpyAzureConfigError -from msticpy.sectools import TILookup, GeoLiteLookup, IPStackLookup +from msticpy.data.query_defns import DataEnvironment +from msticpy.datamodel.pivot import Pivot +from msticpy.sectools import GeoLiteLookup, IPStackLookup, TILookup +from ._version import VERSION from .common import MsticnbDataProviderError, MsticnbError from .options import get_opt -from ._version import VERSION - __version__ = VERSION __author__ = "Ian Hellen" @@ -53,8 +53,8 @@ def __call__(self, *args, **kwargs): """Overide the __call__ method for the wrapper class.""" if ( self.instance is None - or self.instance.kwargs != kwargs - or self.instance.args != args + or getattr(self.instance, "kwargs", None) != kwargs + or getattr(self.instance, "args", None) != args ): self.instance = self.wrapped_cls(*args, **kwargs) self.instance.kwargs = kwargs @@ -83,12 +83,12 @@ def __getattr__(self, name): class DataProviders: """Notebooklet DataProviders class.""" - _default_providers = ["tilookup", "geolitelookup"] - _other_providers: List[str] = ["ipstacklookup"] + _DEFAULT_PROVIDERS = ["tilookup", "geolitelookup"] + _OTHER_PROVIDERS = ["ipstacklookup"] def __init__( self, - query_provider: str = "AzureSentinel", + query_provider: Union[str, QueryProvider] = "AzureSentinel", providers: Optional[List[str]] = None, **kwargs, ): @@ -97,8 +97,9 @@ def __init__( Parameters ---------- - query_provider : str, optional + query_provider : Union[str, QueryProvider], optional DataEnvironment name of the primary query provider, + or an instance of an existing query provider, by default "AzureSentinel" providers : Optional[List[str]], optional A list of provider names to load. @@ -125,31 +126,20 @@ def __init__( """ self.provider_names: set = self._get_custom_providers(providers) - parsed_provider = DataEnvironment.parse(query_provider) - if parsed_provider == DataEnvironment.Unknown: - known_providers = set(DataEnvironment.__members__.keys()) - { - "Unknown", - "Kusto", - "AzureSecurityCenter", - } - raise MsticnbDataProviderError( - f"Unknown query provider '{query_provider}", - f"Available providers are {', '.join(known_providers)}", - ) - self.provider_names.add(parsed_provider.name) + self.provider_classes: Dict[str, ProviderDefn] = self._create_provider_defns() self.providers: Dict[str, Any] = {} - self.provider_classes: Dict[str, ProviderDefn] = { - "azuresentinel": ProviderDefn(QueryProvider, True, self._azsent_get_config), - "queryprovider": ProviderDefn(QueryProvider, True, None), - "azuredata": ProviderDefn(AzureData, True, None), - "tilookup": ProviderDefn(TILookup, False, None), - "geolitelookup": ProviderDefn(GeoLiteLookup, False, None), - "ipstacklookup": ProviderDefn(IPStackLookup, False, None), - } - self.provider_classes["loganalytics"] = self.provider_classes["azuresentinel"] self.query_provider = None - + if isinstance(query_provider, str): + parsed_provider = self._parse_provider_name(query_provider) + self.provider_names.add(parsed_provider.name) + elif isinstance(query_provider, QueryProvider): + # If this is a query provider instance, just add it directly + self.query_provider = query_provider + parsed_provider = query_provider.environment + self.providers[parsed_provider] = query_provider + + # Go through list of providers, instantiating and connecting them. for provider in sorted(self.provider_names): try: self.add_provider(provider, **kwargs) @@ -157,8 +147,12 @@ def __init__( print(f"Data provider {provider} could not be added.") print(err.args) else: - if provider in self.providers and provider == parsed_provider.name: - # If this is the default query provider + if ( + provider in self.providers + and not self.query_provider + and provider == parsed_provider.name + ): + # This is the default query provider setattr(self, "query_provider", self.providers[provider]) def __getitem__(self, key: str): @@ -170,11 +164,42 @@ def __getitem__(self, key: str): return self.providers[alt_key] raise KeyError(key, "not found") + def _create_provider_defns(self): + """Definitions for provider construction.""" + defns = { + "azuresentinel": ProviderDefn(QueryProvider, True, self._azsent_get_config), + "queryprovider": ProviderDefn(QueryProvider, True, None), + "azuredata": ProviderDefn(AzureData, True, None), + "tilookup": ProviderDefn(TILookup, False, None), + "geolitelookup": ProviderDefn(GeoLiteLookup, False, None), + "ipstacklookup": ProviderDefn(IPStackLookup, False, None), + } + # Add loganalytics as an alias for azuresentinel + defns["loganalytics"] = defns["azuresentinel"] + return defns + + @staticmethod + def _parse_provider_name(query_provider): + parsed_provider = DataEnvironment.parse(query_provider) + # If we weren't able to match the name to a known provider, raise exception. + if parsed_provider == DataEnvironment.Unknown: + known_providers = set(DataEnvironment.__members__.keys()) - { + "Unknown", + "Kusto", + "AzureSecurityCenter", + } + raise MsticnbDataProviderError( + f"Unknown query provider '{query_provider}", + f"Available providers are {', '.join(known_providers)}", + ) + return parsed_provider + def _get_custom_providers(self, providers): - requested_provs = set(self._default_providers) + requested_provs = set(self._DEFAULT_PROVIDERS) if not providers: return requested_provs + providers = [prov.casefold() for prov in providers] add_provs = {opt[1:] for opt in providers if opt.startswith("+")} sub_provs = {opt[1:] for opt in providers if opt.startswith("-")} std_provs = {opt for opt in providers if opt[0] not in ("+", "-")} @@ -292,8 +317,8 @@ def list_providers(cls) -> List[str]: """ providers = list(DataEnvironment.__members__.keys()) providers.remove("Unknown") - providers.extend(cls._default_providers) - providers.extend(cls._other_providers) + providers.extend(cls._DEFAULT_PROVIDERS) + providers.extend(cls._OTHER_PROVIDERS) return providers # Provider initializers @@ -309,7 +334,7 @@ def get_def_providers(cls) -> List[str]: List of default providers. """ - return cls._default_providers + return cls._DEFAULT_PROVIDERS def _query_prov(self, provider, provider_defn, **kwargs): try: @@ -403,7 +428,7 @@ def _azsent_get_config(**kwargs): def init( - query_provider: str = "LogAnalytics", + query_provider: str = "AzureSentinel", providers: Optional[List[str]] = None, **kwargs, ): @@ -414,10 +439,11 @@ def init( ---------- query_provider : str, optional DataEnvironment name of the primary query provider. + By default, "AzureSentinel". You can add addtional query providers by including them in the `providers` list. providers : Optional[List[str]], optional - A list of provider names, by default "LogAnalytics" + A list of provider names, by default None Other Parameters ---------------- @@ -441,6 +467,14 @@ def init( """ d_provs = DataProviders(query_provider, providers, **kwargs) - print(f"Loaded providers: {', '.join(d_provs.providers.keys())}") + print(f"Notebooklets: Loaded providers: {', '.join(d_provs.providers.keys())}") msticnb = sys.modules["msticnb"] setattr(msticnb, "data_providers", d_provs.providers) + + if Pivot.current: + # We have to import add_pivot_functions here since it introduces + # a circular import chain if imported at the module level. + # pylint: disable=import-outside-toplevel + from .nb_pivot import add_pivot_funcs + + add_pivot_funcs(Pivot.current) diff --git a/msticnb/nb/azsent/account/account_summary.py b/msticnb/nb/azsent/account/account_summary.py index 109a93a..0a1b299 100644 --- a/msticnb/nb/azsent/account/account_summary.py +++ b/msticnb/nb/azsent/account/account_summary.py @@ -8,7 +8,6 @@ from typing import Any, Callable, Dict, Iterable, Optional, Union import pandas as pd -from bokeh.io import show from bokeh.models import LayoutDOM from IPython.display import HTML from msticpy.common.timespan import TimeSpan @@ -23,7 +22,10 @@ nb_display, nb_markdown, set_text, + show_bokeh, + df_has_data, ) +from ....nblib.azsent.alert import browse_alerts from ....notebooklet import NBMetadata, Notebooklet, NotebookletResult __version__ = VERSION @@ -39,12 +41,14 @@ class AccountType(Flag): """Account types.""" + # pylint: disable=invalid-name AzureActiveDirectory = auto() AzureActivity = auto() Office365 = auto() Windows = auto() Linux = auto() Azure = AzureActiveDirectory | AzureActivity | Office365 + # pylint: enable=invalid-name def in_list(self, acct_types: Iterable[Union["AccountType", str]]): """Is the current value in the `acct_types` list.""" @@ -253,6 +257,10 @@ def run( self.display_alert_timeline() else: # if multiple, create a selector + nb_markdown("
") + nb_markdown( + "Multiple matching accounts found, select one to see details.", "large" + ) result.account_selector = _get_account_selector( qry_prov=self.query_provider, all_acct_dfs=all_acct_dfs, @@ -262,6 +270,15 @@ def run( ) nb_display(result.account_selector) + if not acct_index_df.empty: + nb_markdown("
") + nb_markdown( + "

Use result.notebooklet.get_additional_data()" + + " to retrieve more data." + ) + nb_markdown( + f"Additional methods for this class:
{'
'.join(self.list_methods())}" + ) # Assign the result to the _last_result attribute # so that you can get to it without having to re-run the operation self._last_result = result # pylint: disable=attribute-defined-outside-init @@ -272,51 +289,62 @@ def run( def display_alert_timeline(self): """Display the alert timeline.""" - if ( - self._last_result is not None - and self._last_result.alert_timeline is not None - ): - show(self._last_result.alert_timeline) + if self.check_valid_result_data("related_alerts"): + return _get_alerts_timeline(self._last_result.related_alerts, silent=False) + return None def browse_accounts(self) -> nbwidgets.SelectItem: """Return the accounts browser/viewer.""" - if ( - self._last_result is not None - and self._last_result.account_selector is not None - ): + if self.check_valid_result_data("account_selector"): return self._last_result.account_selector return None def browse_alerts(self) -> nbwidgets.SelectAlert: """Return alert browser/viewer.""" - if ( - self._last_result is not None - and self._last_result.related_alerts is not None - and not self._last_result.related_alerts.empty - ): - if "CompromisedEntity" not in self._last_result.related_alerts: - self._last_result.related_alerts["CompromisedEntity"] = "n/a" - if "StartTimeUtc" not in self._last_result.related_alerts: - self._last_result.related_alerts[ - "StartTimeUtc" - ] = self._last_result.related_alerts["TimeGenerated"] - return nbwidgets.SelectAlert( - alerts=self._last_result.related_alerts, action=nbdisplay.format_alert - ) + if self.check_valid_result_data("related_alerts"): + return browse_alerts(self._last_result) return None def browse_bookmarks(self) -> nbwidgets.SelectItem: """Return bookmark browser/viewer.""" - if ( - self._last_result is not None - and self._last_result.related_bookmarks is not None - and not self._last_result.related_bookmarks.empty - ): + if self.check_valid_result_data("related_bookmarks"): return _get_bookmark_select(self._last_result.related_bookmarks) return None + def az_activity_timeline_by_provider(self): + """Display Azure activity timeline by provider.""" + if self.check_valid_result_data("azure_activity"): + return _plot_timeline_by_provider(self._last_result.azure_activity) + return None + + def az_activity_timeline_by_ip(self): + """Display Azure activity timeline by IP address.""" + if self.check_valid_result_data("azure_activity"): + return _plot_timeline_by_ip(self._last_result.azure_activity) + return None + + def az_activity_timeline_by_operation(self): + """Display Azure activity timeline by operation.""" + if self.check_valid_result_data("azure_activity"): + return _plot_timeline_by_operation(self._last_result.azure_activity) + return None + + def host_logon_timeline(self): + """Display Azure activity timeline by operation.""" + if self.check_valid_result_data("host_logons"): + _, source = self._get_selected_account() + ip_col = ( + "SourceIP" + if AccountType.parse(source) == AccountType.Linux + else "IpAddress" + ) + return _create_host_timeline( + self._last_result.host_logons, ip_col=ip_col, silent=False + ) + return None + @set_text(docs=_CELL_DOCS, key="find_additional_data") - def find_additional_data(self) -> pd.DataFrame: + def get_additional_data(self) -> pd.DataFrame: """ Find additional data for the selected account. @@ -326,11 +354,7 @@ def find_additional_data(self) -> pd.DataFrame: Results with expanded columns. """ - if self._last_result is None: - print( - "Please use 'run()' to fetch the data before using this method.", - "\nThen select an account to examine and run 'find_additional_data()'", - ) + if not self.check_valid_result_data(): return acct, source = self._get_selected_account() if not acct or not source: @@ -476,7 +500,7 @@ def _get_matching_accounts(qry_prov, timespan, account, account_types): nb_markdown(f" {len(linux_logon_df)} records in Linux logon data") account_dfs[AccountType.Linux] = linux_logon_df - nb_markdown(f"Found {rec_count} total recordsmsticnb.") + nb_markdown(f"Found {rec_count} total records.") return account_dfs @@ -560,7 +584,7 @@ def display_account(selected_item: str): # Create entity acct_entity = _create_account_entity(account_name, acct_type, all_acct_dfs) - outputs.append(HTML("

Alert Entity

")) + outputs.append(HTML("

Account Entity

")) result.account_entity = acct_entity outputs.append(acct_entity) # Add account activity @@ -573,7 +597,7 @@ def display_account(selected_item: str): related_alerts = _get_related_alerts(qry_prov, account_name, timespan) result.related_alerts = related_alerts outputs.append(_get_related_alerts_summary(related_alerts)) - if related_alerts is not None and not related_alerts.empty: + if df_has_data(related_alerts): result.alert_timeline = _get_alerts_timeline(related_alerts) if "get_bookmarks" in options: related_bkmarks = _get_related_bookmarks(qry_prov, account_name, timespan) @@ -695,14 +719,14 @@ def _get_related_alerts( ) -def _get_alerts_timeline(related_alerts: pd.DataFrame) -> LayoutDOM: +def _get_alerts_timeline(related_alerts: pd.DataFrame, silent=True) -> LayoutDOM: """Return alert timeline.""" return nbdisplay.display_timeline( data=related_alerts, title="Alerts", source_columns=["AlertName"], height=300, - hide=True, + hide=silent, ) @@ -721,11 +745,15 @@ def _get_related_alerts_summary(related_alerts: pd.DataFrame): f"related to this account", ] + total_alerts = 0 for (name, count) in alert_items.items(): output.append(f"- {name}, # Alerts: {count}") - output.append( - "
To show the alert timeline call the display_alert_time() method." - ) + total_alerts += count + + if total_alerts > 1: + output.append( + "
To show the alert timeline call the display_alert_timeline() method." + ) output.append("To browse the alerts call the browse_alerts() method.") return HTML("
".join(output)) @@ -872,30 +900,47 @@ def _get_azure_add_activity(qry_prov, acct, timespan): @set_text(docs=_CELL_DOCS, key="create_az_timelines") def _create_azure_timelines(az_all_data: pd.DataFrame, silent: bool = False): - timeline_by_provider = nbdisplay.display_timeline( + return ( + _plot_timeline_by_provider(az_all_data, silent), + _plot_timeline_by_ip(az_all_data, silent), + _plot_timeline_by_operation(az_all_data, silent), + ) + + +def _plot_timeline_by_provider(az_all_data, silent=False): + return nbdisplay.display_timeline( data=az_all_data, group_by="AppResourceProvider", source_columns=["Operation", "IPAddress", "AppResourceProvider"], title="Azure account activity by Provider", hide=silent, ) - timeline_by_ip = nbdisplay.display_timeline( + + +def _plot_timeline_by_ip(az_all_data, silent=False): + return nbdisplay.display_timeline( data=az_all_data, group_by="IPAddress", - source_columns=["Operation", "IPAddress", "AppResourceProvider"], + source_columns=[ + "AppResourceProvider", + "Operation", + "IPAddress", + "AppResourceProvider", + ], title="Azure Operations by Source IP", hide=silent, ) - timeline_by_operation = nbdisplay.display_timeline( + + +def _plot_timeline_by_operation(az_all_data, silent=False): + return nbdisplay.display_timeline( data=az_all_data, group_by="Operation", - source_columns=["Operation", "IPAddress", "AppResourceProvider"], + source_columns=["AppResourceProvider", "Operation", "IPAddress"], title="Azure Operations by Operation", hide=silent, ) - return (timeline_by_provider, timeline_by_ip, timeline_by_operation) - @set_text(docs=_CELL_DOCS, key="summarize_azure_activity") def _summarize_azure_activity(az_all_data: pd.DataFrame): diff --git a/msticnb/nb/azsent/account/account_summary.yaml b/msticnb/nb/azsent/account/account_summary.yaml index 23bc32a..78468fd 100644 --- a/msticnb/nb/azsent/account/account_summary.yaml +++ b/msticnb/nb/azsent/account/account_summary.yaml @@ -49,12 +49,12 @@ output: title: Summary of host logon activity. text: ' Shows the total number of logons attempts by host. - FailedLogons shows the breakdown of successfull and failed + FailedLogons shows the breakdown of successfully and failed logons. IPAddresses is a list of distinct source IP addresses for the logons. LogonTypeCount breaks down the logon type used by count. - First and LastLogon shows the earliest and lastest logons + First and LastLogon shows the earliest and latest logons on each host by this account in the selected time range. ' md: True @@ -77,6 +77,7 @@ output: - Application resource provider - User type ' + md: True create_azure_timelines: title: Azure activity timelines hd_level: 3 diff --git a/msticnb/nb/azsent/network/ip_summary.py b/msticnb/nb/azsent/network/ip_summary.py new file mode 100644 index 0000000..4ea5698 --- /dev/null +++ b/msticnb/nb/azsent/network/ip_summary.py @@ -0,0 +1,680 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +"""IP Address Summary notebooklet.""" +from ipaddress import IPv4Address, IPv4Network, IPv6Address, ip_address +import json +from json import JSONDecodeError +from typing import Any, Dict, Iterable, Optional, Union + +import pandas as pd +from bokeh.plotting.figure import Figure +from msticpy.common.timespan import TimeSpan +from msticpy.datamodel.entities import Host, IpAddress, GeoLocation +from msticpy.nbtools import nbwidgets, nbdisplay +from msticpy.sectools.ip_utils import get_ip_type, get_whois_info + +from .... import nb_metadata +from ...._version import VERSION +from ....common import ( + MsticnbMissingParameterError, + nb_data_wait, + nb_markdown, + set_text, + nb_display, + df_has_data, +) +from ....nblib.azsent.alert import browse_alerts +from ....nblib.azsent.host import populate_host_entity +from ....nblib.iptools import is_in_vps_net +from ....notebooklet import NBMetadata, Notebooklet, NotebookletResult + +__version__ = VERSION +__author__ = "Ian Hellen" + + +# Read module metadata from YAML +_CLS_METADATA: NBMetadata +_CELL_DOCS: Dict[str, Any] +_CLS_METADATA, _CELL_DOCS = nb_metadata.read_mod_metadata(__file__, __name__) + + +# pylint: disable=too-few-public-methods +# Rename this class +class IpSummaryResult(NotebookletResult): + """ + IPSummary Results. + + Attributes + ---------- + ip_str : str + The input IP address as a string. + ip_address : Optional[Union[IPv4Address, IPv6Address]] + Ip Address Python object + ip_entity : IpAddress + IpAddress entity + ip_origin : str + "External" or "Internal" + host_entity : Host + Host entity associated with IP Address + ip_type : str + IP address type - "Public", "Private", etc. + vps_network : IPv4Network + If this is not None, the address is part of a know VPS network. + geoip : Optional[Dict[str, Any]] + Geo location information as a dictionary. + location : Optional[GeoLocation] + Location entity context object. + whois : pd.DataFrame + WhoIs information for IP Address + whois_nets : pd.DataFrame + List of networks definitions from WhoIs data + heartbeat : pd.DataFrame + Heartbeat record for IP Address or host + az_network_if : pd.DataFrame + Azure Network analytics interface record, if available + vmcomputer : pd.DataFrame + VMComputer latest record + az_network_flows : pd.DataFrame + Azure Network analytics flows for IP, if available + az_network_flows_timeline: Figure + Azure Network analytics flows timeline, if data is available + aad_signins : pd.DataFrame = None + AAD signin activity + azure_activity : pd.DataFrame = None + Azure Activity log entries + azure_activity_summary : pd.DataFrame = None + Azure Activity (AAD and Az Activity) summarized view + office_activity : pd.DataFrame = None + Office 365 activity + related_alerts : pd.DataFrame + Alerts related to IP Address + related_bookmarks : pd.DataFrame + Bookmarks related to IP Address + alert_timeline : Figure + Timeline plot of alerts + ti_results: pd.DataFrame + Threat intel lookup results + passive_dns: pd.DataFrame + Passive DNS lookup results + + """ + + def __init__( + self, + description: Optional[str] = None, + timespan: Optional[TimeSpan] = None, + notebooklet: Optional["Notebooklet"] = None, + ): + """ + Create new IPSummaryResult result instance. + + Parameters + ---------- + description : Optional[str], optional + Result description, by default None + timespan : Optional[TimeSpan], optional + TimeSpan for the results, by default None + notebooklet : Optional[, optional + Originating notebooklet, by default None + """ + super().__init__(description, timespan, notebooklet) + self.description: str = "IP Address Summary" + + # Add attributes as needed here. + # Make sure they are documented in the Attributes section + # above. + self.ip_str: str = "" + self.ip_address: Optional[Union[IPv4Address, IPv6Address]] = None + self.ip_entity: IpAddress = None + self.ip_origin: str = "External" + self.ip_type: str = "Public" + self.vps_network: Optional[IPv4Network] = None + self.host_entity: Host = None + self.geoip: Optional[Dict[str, Any]] = None + self.location: Optional[GeoLocation] = None + self.whois: pd.DataFrame = None + self.whois_nets: pd.DataFrame = None + self.heartbeat: pd.DataFrame = None + self.az_network_if: pd.DataFrame = None + self.vmcomputer: pd.DataFrame = None + self.az_network_flows: pd.DataFrame = None + self.az_network_flow_summary: pd.DataFrame = None + self.az_network_flows_timeline: Figure = None + self.aad_signins: pd.DataFrame = None + self.azure_activity: pd.DataFrame = None + self.azure_activity_summary: pd.DataFrame = None + self.office_activity: pd.DataFrame = None + self.related_alerts: pd.DataFrame = None + self.related_bookmarks: pd.DataFrame = None + self.alert_timeline: Figure = None + self.ti_results: pd.DataFrame = None + self.passive_dns: pd.DataFrame = None + + +# pylint: enable=too-few-public-methods + + +# Rename this class +class IpAddressSummary(Notebooklet): + """ + IP Address Summary Notebooklet class. + + Queries and displays summary information about an IP address, including: + + - Basic IP address properties + - IpAddress entity (and Host entity, if a host could be associated) + - WhoIs and Geo-location + - Azure activity and network data (optional) + - Office activity summary (optional) + - TODO + + """ + + # assign metadata from YAML to class variable + metadata = _CLS_METADATA + __doc__ = nb_metadata.update_class_doc(__doc__, metadata) + _cell_docs = _CELL_DOCS + + # @set_text decorator will display the title and text every time + # this method is run. + # The key value refers to an entry in the `output` section of + # the notebooklet yaml file. + @set_text(docs=_CELL_DOCS, key="run") + def run( + self, + value: Any = None, + data: Optional[pd.DataFrame] = None, + timespan: Optional[TimeSpan] = None, + options: Optional[Iterable[str]] = None, + **kwargs, + ) -> IpSummaryResult: + """ + Return XYZ summary. + + Parameters + ---------- + value : str + IP Address - The key for searches + data : Optional[pd.DataFrame], optional + Not supported for this notebooklet. + timespan : TimeSpan + Timespan for queries + options : Optional[Iterable[str]], optional + List of options to use, by default None. + A value of None means use default options. + Options prefixed with "+" will be added to the default options. + To see the list of available options type `help(cls)` where + "cls" is the notebooklet class or an instance of this class. + + Returns + ------- + IpSummaryResult + Result object with attributes for each result type. + + Raises + ------ + MsticnbMissingParameterError + If required parameters are missing + + """ + # This line use logic in the superclass to populate options + # (including default options) into this class. + super().run( + value=value, data=data, timespan=timespan, options=options, **kwargs + ) + + if not value: + raise MsticnbMissingParameterError("value") + if not timespan: + raise MsticnbMissingParameterError("timespan.") + + # Create a result class + result = IpSummaryResult( + notebooklet=self, description=self.metadata.description, timespan=timespan + ) + + result.ip_str = value + result.ip_address = ip_address(value) + result.ip_type = get_ip_type(value) + result.ip_entity = IpAddress(Address=value) + nb_markdown(f"{value}, ip address type: {result.ip_type}") + + if "az_net_if" in self.options and self.check_table_exists( + "AzureNetworkAnalytics_CL" + ): + _get_az_net_if(qry_prov=self.query_provider, src_ip=value, result=result) + if "heartbeat" in self.options and self.check_table_exists("Heartbeat"): + _get_heartbeat(qry_prov=self.query_provider, src_ip=value, result=result) + if "vmcomputer" in self.options and self.check_table_exists("VMComputer"): + _get_vmcomputer(qry_prov=self.query_provider, src_ip=value, result=result) + _populate_host_entity(result) + if not result.host_entity: + result.host_entity = Host(HostName="unknown") + + if "alerts" in self.options: + self._get_related_alerts(src_ip=value, result=result, timespan=timespan) + if "bookmarks" in self.options: + self._get_related_bookmarks(src_ip=value, result=result, timespan=timespan) + if "az_netflow" in self.options: + self._get_azure_netflow(src_ip=value, result=result, timespan=timespan) + if df_has_data(result.az_network_flows): + result.az_network_flow_summary = _summarize_netflows( + result.az_network_flows + ) + nb_display() + if "az_activity" in self.options: + self._get_azure_activity(src_ip=value, result=result, timespan=timespan) + _summarize_azure_activity(result) + if "office_365" in self.options: + self._get_office_activity(src_ip=value, result=result, timespan=timespan) + + result.ip_origin = _determine_ip_origin(result) + + if result.ip_type == "Public": + self._get_public_ip_data(src_ip=value, result=result) + + # Assign the result to the _last_result attribute + # so that you can get to it without having to re-run the operation + self._last_result = result # pylint: disable=attribute-defined-outside-init + + nb_markdown("

View the returned results object for more details.

") + nb_markdown( + f"Additional methods for this class:
{'
'.join(self.list_methods())}" + ) + return self._last_result + + def browse_alerts(self) -> nbwidgets.SelectAlert: + """Return alert browser/viewer.""" + if self.check_valid_result_data("related_alerts"): + return browse_alerts(self._last_result, "related_alerts") + return None + + def display_alert_timeline(self): + """Display the alert timeline.""" + if self.check_valid_result_data("related_alerts"): + return nbdisplay.display_timeline( + data=self._last_result.related_alerts, + title="Alerts", + source_columns=["AlertName"], + height=300, + hide=True, + ) + return None + + def browse_ti_results(self): + """Display Threat intel results.""" + if self.check_valid_result_data("ti_results"): + ti_lookup = self.get_provider("tilookup") + return ti_lookup.browse_results(self._last_result.ti_results) + return None + + def netflow_by_protocol(self,) -> Figure: + """Display netflows grouped by protocol.""" + if not self.check_valid_result_data("az_network_flows"): + return None + return _plot_netflow_by_protocol(self._last_result) + + def netflow_total_by_protocol(self,) -> Figure: + """Display netflows grouped by protocol.""" + if not self.check_valid_result_data("az_network_flows"): + return None + return _plot_netflow_values_by_protocol(self._last_result) + + def netflow_by_direction(self,) -> Figure: + """Display netflows grouped by direction.""" + if not self.check_valid_result_data("az_network_flows"): + return None + return _plot_netflow_by_direction(self._last_result) + + @set_text(docs=_CELL_DOCS, key="get_public_ip_data") + def _get_public_ip_data(self, src_ip, result): + """Retrieve data for public IP.""" + _get_whois(src_ip, result) + nb_markdown("WhoIs data") + nb_display(pd.DataFrame(result.whois.iloc[0]).T) + + # GeoIP + if "geoip" in self.options: + geo_lookup = self.get_provider("geolitelookup") or self.get_provider( + "ipstacklookup" + ) + if geo_lookup: + _get_geoip_data(geo_lookup, src_ip, result) + + # TI Lookup + if result.ip_origin == "External" or "ti" in self.options: + _get_ti_data(self.get_provider("tilookup"), src_ip, result) + + # Passive DNS + if ( + result.ip_origin == "External" or "passive_dns" in self.options + ) and isinstance(result.ip_address, IPv4Address): + _get_passv_dns(self.get_provider("tilookup"), src_ip, result) + + # VPS Check + vps_net = is_in_vps_net(src_ip) + if vps_net: + nb_markdown(f"IP is part of known VPS network {vps_net}") + result.vps_network = vps_net + else: + nb_markdown("No match for known VPS network") + + @set_text(docs=_CELL_DOCS, key="get_az_netflow") + def _get_azure_netflow(self, src_ip, result, timespan): + """Retrieve Azure netflow and activity events.""" + if self.check_table_exists("AzureNetworkAnalytics_CL"): + _get_az_netflows(self.query_provider, src_ip, result, timespan) + _display_df_summary(result.az_network_flows, "Azure network flows") + + @set_text(docs=_CELL_DOCS, key="get_az_activity") + def _get_azure_activity(self, src_ip, result, timespan): + """Retrieve Azure netflow and activity events.""" + if self.check_table_exists("SigninLogs"): + nb_data_wait("SigninLogs") + result.aad_signins = self.query_provider.Azure.list_aad_signins_for_ip( + timespan, ip_address_list=src_ip + ) + _display_df_summary(result.aad_signins, "AAD signins") + + if self.check_table_exists("AzureActivity"): + nb_data_wait("AzureActivity") + result.azure_activity = self.query_provider.Azure.list_azure_activity_for_ip( + timespan, ip_address_list=src_ip + ) + _display_df_summary(result.azure_activity, "Azure Activity") + + @set_text(docs=_CELL_DOCS, key="get_office_activity") + def _get_office_activity(self, src_ip, result, timespan): + """Retrieve Office activity data.""" + if self.check_table_exists("OfficeActivity"): + nb_data_wait("OfficeActivity") + summarize = "| summarize operations=count() by OfficeWorkload, Operation" + result.office_activity = self.query_provider.Office365.list_activity_for_ip( + timespan, ip_address_list=src_ip, add_query_items=summarize + ) + _display_df_summary(result.office_activity, "Office 365 operations") + if df_has_data(result.office_activity): + nb_display(result.office_activity) + + @set_text(docs=_CELL_DOCS, key="get_related_alerts") + def _get_related_alerts(self, src_ip, result, timespan): + """Get any related alerts for `src_ip`.""" + nb_data_wait("RelatedAlerts") + result.related_alerts = self.query_provider.SecurityAlert.list_alerts_for_ip( + timespan, source_ip_list=src_ip + ) + _display_df_summary(result.related_alerts, "related alerts") + if df_has_data(result.related_alerts): + nb_markdown( + "Use `browse_alerts` and `display_alert_timeline` to view these." + ) + + @set_text(docs=_CELL_DOCS, key="get_related_alerts") + def _get_related_bookmarks( + self, src_ip, result, timespan: TimeSpan + ) -> pd.DataFrame: + nb_data_wait("Bookmarks") + result.related_bookmarks = self.query_provider.AzureSentinel.list_bookmarks_for_entity( + timespan, entity_id=src_ip + ) + _display_df_summary(result.related_bookmarks, "related bookmarks") + + +# %% +# Helper functions +def _display_df_summary(data, description): + if df_has_data(data): + nb_markdown(f"{len(data)} {description}.") + else: + nb_markdown(f"No events from {description} found.") + + +def _determine_ip_origin(result): + return ( + "Internal" + if ( + result.ip_type == "Private" + or df_has_data(result.heartbeat) + or df_has_data(result.az_network_if) + ) + else "External" + ) + + +# %% +# Get Azure network flows +def _get_az_netflows(qry_prov, src_ip, result, timespan): + nb_data_wait("AzureNetworkAnalytics flows") + result.az_network_flows = qry_prov.Network.list_azure_network_flows_by_ip( + timespan, ip_address_list=src_ip + ) + if df_has_data(result.az_network_flows): + result.az_network_flows["TotalAllowedFlows"] = ( + result.az_network_flows["AllowedOutFlows"] + + result.az_network_flows["AllowedInFlows"] + ) + result.az_network_flows_timeline = _plot_netflow_by_protocol(result) + + +def _plot_netflow_by_protocol(result): + return result.az_network_flows.mp_timeline.plot( + group_by="L7Protocol", + title="Network Flows by Protocol", + time_column="FlowStartTime", + source_columns=["FlowType", "AllExtIPs", "L7Protocol", "FlowDirection"], + height=300, + legend="right", + yaxis=True, + ) + + +def _plot_netflow_values_by_protocol(result): + return result.az_network_flows.mp_timeline.plot_values( + group_by="L7Protocol", + source_columns=[ + "FlowType", + "AllExtIPs", + "L7Protocol", + "FlowDirection", + "TotalAllowedFlows", + ], + time_column="FlowStartTime", + title="Network flows by Layer 7 Protocol", + y="TotalAllowedFlows", + legend="right", + height=500, + kind=["vbar", "circle"], + ) + + +def _plot_netflow_by_direction(result): + return result.az_network_flows.mp_timeline.plot( + group_by="FlowDirection", + title="Network Flows by Direction", + time_column="FlowStartTime", + source_columns=["FlowType", "AllExtIPs", "L7Protocol", "FlowDirection"], + height=300, + legend="right", + yaxis=True, + ) + + +@set_text(docs=_CELL_DOCS, key="netflow_summary") +def _summarize_netflows(data): + # pylint: disable=unnecessary-lambda + return ( + data[ + [ + "AllExtIPs", + "L7Protocol", + "FlowDirection", + "TotalAllowedFlows", + "FlowStartTime", + ] + ] + .groupby(["L7Protocol", "FlowDirection"]) + .agg( + ExtIPs=pd.NamedAgg(column="AllExtIPs", aggfunc=lambda x: ", ".join(x)), + ExtIPCount=pd.NamedAgg(column="AllExtIPs", aggfunc="count"), + FirstFlow=pd.NamedAgg(column="FlowStartTime", aggfunc="min"), + LastFlow=pd.NamedAgg(column="FlowStartTime", aggfunc="max"), + ) + ) + # pylint: enable=unnecessary-lambda + + +# %% +# Azure activity +def _summarize_azure_activity(result): + az_dfs = [] + if df_has_data(result.aad_signins): + az_dfs.append( + result.aad_signins.assign( + UserPrincipalName=lambda x: x.UserPrincipalName.str.lower() + ).rename( + columns={ + "OperationName": "Operation", + "AppDisplayName": "AppResourceProvider", + } + ) + ) + + if df_has_data(result.azure_activity): + az_dfs.append( + result.azure_activity.assign( + UserPrincipalName=lambda x: x.Caller.str.lower() + ).rename( + columns={ + "CallerIpAddress": "IPAddress", + "OperationName": "Operation", + "ResourceProvider": "AppResourceProvider", + "Category": "UserType", + } + ) + ) + + if not az_dfs: + return + + az_all_data = pd.concat(az_dfs) + result.azure_activity_summary = az_all_data.groupby( + ["UserPrincipalName", "Type", "IPAddress", "AppResourceProvider", "UserType"] + ).agg( + OperationCount=pd.NamedAgg(column="Type", aggfunc="count"), + OperationTypes=pd.NamedAgg( + column="Operation", aggfunc=lambda x: x.unique().tolist() + ), + Resources=pd.NamedAgg(column="ResourceId", aggfunc="nunique"), + FirstOperation=pd.NamedAgg(column="TimeGenerated", aggfunc="min"), + LastOperation=pd.NamedAgg(column="TimeGenerated", aggfunc="max"), + ) + nb_display(result.azure_activity_summary) + + +# %% +# Azure heartbeat, interface and VMComputer data +@set_text(docs=_CELL_DOCS, key="get_az_net_if") +def _get_az_net_if(qry_prov, src_ip, result): + """Get the AzureNetwork topology record for `src_ip`.""" + nb_data_wait("AzureNetworkAnalytics topology") + # Try to find the interface topology log entry + result.az_network_if = qry_prov.Network.get_host_for_ip( # type:ignore + ip_address=src_ip + ) + if not df_has_data(result.az_network_if): + nb_markdown("Could not get Azure network interface record") + + +@set_text(docs=_CELL_DOCS, key="get_heartbeat") +def _get_heartbeat(qry_prov, src_ip, result): + """Get the Heartbeat record for `src_ip`.""" + nb_data_wait("Heartbeat") + if result.ip_type == "Public": + result.heartbeat = qry_prov.Network.get_heartbeat_for_ip(ip_address=src_ip) + elif result.host_entity.HostName and result.host_entity.HostName != "unknown": + result.heartbeat = qry_prov.Network.get_heartbeat_for_host( + host_name=result.host_entity.HostName + ) + if not df_has_data(result.heartbeat): + nb_markdown("Could not get Azure Heartbeat record") + + +@set_text(docs=_CELL_DOCS, key="get_vmcomputer") +def _get_vmcomputer(qry_prov, src_ip, result): + """Get the VMComputer record for `src_ip`.""" + nb_data_wait("VMComputer") + result.vmcomputer = qry_prov.Azure.get_vmcomputer_for_ip( # type:ignore + ip_address=src_ip + ) + if not df_has_data(result.vmcomputer): + nb_markdown("Could not get VMComputer record") + + +def _populate_host_entity(result): + """Populate host entity and IP address details.""" + result.host_entity = populate_host_entity( + heartbeat_df=result.heartbeat, + az_net_df=result.az_network_if, + vmcomputer_df=result.vmcomputer, + host_entity=result.host_entity, + ) + + +# %% +# Public IP functions +def _get_whois(src_ip, result): + """Get WhoIs data and split out networks.""" + _, whois_dict = get_whois_info(src_ip) + result.whois = pd.DataFrame(whois_dict) + result.whois_nets = pd.DataFrame(whois_dict.get("nets", [])) + if df_has_data(result.whois): + nb_markdown("Whois data retrieved") + + +def _get_geoip_data(geo_lookup, src_ip, result): + if result.ip_entity: + geo_list, ip_list = geo_lookup.lookup_ip(ip_entity=result.ip_entity) + else: + geo_list, ip_list = geo_lookup.lookup_ip(src_ip) + result.geoip = geo_list[0] if geo_list else None + if isinstance(result.geoip, str): + try: + result.geoip = json.loads(result.geoip) + except JSONDecodeError: + pass + result.ip_entity = ip_list[0] if ip_list else None + if result.ip_entity and hasattr(result.ip_entity, "Location"): + result.location = result.ip_entity.Location + nb_markdown("GeoLocation data retrieved") + nb_display(result.ip_entity.Location) + + +def _get_ti_data(ti_lookup, src_ip, result): + nb_data_wait("Threat Intel") + if not ti_lookup: + return + ti_results = ti_lookup.lookup_ioc(observable=src_ip) + result.ti_results = ti_lookup.result_to_df(ti_results) + warn_ti_res = len(result.ti_results.query("Severity != 'information'")) + if warn_ti_res: + nb_markdown(f"{warn_ti_res} TI result(s) of severity 'warning' or above found.") + nb_display(result.ti_results) + nb_markdown("Use `browse_ti_results()` to view details.") + + +def _get_passv_dns(ti_lookup, src_ip, result): + nb_data_wait("Passive DNS") + if not ti_lookup: + return + passv_dns = ti_lookup.lookup_ioc( + observable=src_ip, + ioc_type="ipv4", + ioc_query_type="passivedns", + providers=["XForce"], + ) + result.passive_dns = ti_lookup.result_to_df(passv_dns) + if result.passive_dns is not None and not result.passive_dns.empty: + nb_markdown(f"{len(result.passive_dns)} Passive DNS results found.") diff --git a/msticnb/nb/azsent/network/ip_summary.yaml b/msticnb/nb/azsent/network/ip_summary.yaml new file mode 100644 index 0000000..7082ad1 --- /dev/null +++ b/msticnb/nb/azsent/network/ip_summary.yaml @@ -0,0 +1,66 @@ +metadata: + name: IpAddressSummary + description: IP Address Summary notebooklet + default_options: + - geoip: Get geo location information for IP address. + - alerts: Get any alerts listing the IP address. + - heartbeat: Get the latest heartbeat record for for this IP Address. + - az_net_if: Get the latest Azure network analytics interface data for this IP Address. + - vmcomputer: Get the latest VMComputer record for this IP Address. + other_options: + - bookmarks: Get any hunting bookmarks listing the IP address. + - az_netflow: Get netflow information from AzureNetworkAnalytics table. + - passive_dns: Force fetching passive DNS data from a TI Provider even if IP is internal. + - az_activity: AAD sign-ins and Azure Activity logs + - office_365: Office 365 activity + - ti: Force get threat intelligence reports even for internal public IPs. + keywords: + - ip + - IPAddress + - network + entity_types: + - ip_address + req_providers: + - AzureSentinel|LocalData + - tilookup + - geolitelookup|ipstacklookup +output: + run: + title: IP Address summary + hd_level: 1 + text: + Retrieving data for IP Address + Data and plots are stored in the result class returned by this function. + md: True + get_az_netflow: + title: Azure network analytics netflow data for IP. + text: + This is is a list of netflow events for the IP. + Timeline by protocol is available in the `result.az_network_flows_timeline` + property + - Use `nblt.netflow_total_by_protocol()` method to view flow totals + by protocol + - Use `nblt.netflow_total_by_direction()` to view a timeline grouped + by direction of flow + md: True + get_related_alerts: + title: Azure Sentinel alerts related to the IP. + text: Use `nblt.browse_alerts()` to retrieve a list of alerts. + get_heartbeat: + title: Azure Sentinel heartbeat record for the IP. + text: (only available for IP addresses that belong + to the subscription) + get_vmcomputer: + title: Azure VMComputer record for the IP. + text: (only available for Azure VMs) + get_az_net_if: + title: Azure Network Analytics Topology record for the IP. + text: (only available for Azure VMs) + get_az_activity: + title: Azure Sign-ins and audit activity from IP Address + get_office_activity: + title: Office 365 operations summary from IP Address + get_public_ip_data: + title: Public IP data (GeoIP, ThreatIntel, Passive DNS, VPS membership) + netflow_summary: + title: Summary of network flow data for this IP Address diff --git a/msticnb/nb/azsent/network/network_flow_summary.py b/msticnb/nb/azsent/network/network_flow_summary.py index 3487c4b..9a3eb3e 100644 --- a/msticnb/nb/azsent/network/network_flow_summary.py +++ b/msticnb/nb/azsent/network/network_flow_summary.py @@ -8,6 +8,7 @@ from itertools import chain from typing import Any, Dict, Iterable, Optional, Tuple +import numpy as np import pandas as pd from bokeh.plotting.figure import Figure from IPython.display import display @@ -25,6 +26,7 @@ nb_markdown, nb_warn, set_text, + df_has_data, ) from ....data_providers import DataProviders from ....nblib.azsent.host import get_aznet_topology, get_heartbeat @@ -225,15 +227,17 @@ def run( ) result.description = ( - "Network flow summary for " + host_name or host_ip # type: ignore - ) + f"Network flow summary for {host_name or host_ip or 'unknown'}" + ) # type: ignore flow_df = _get_az_net_flows( self.query_provider, self.timespan, host_ip, host_name ) result.network_flows = flow_df - if "resolve_host" in self.options: + if "resolve_host" in self.options or not hasattr( + result.host_entity, "IpAddress" + ): result.host_entity = _get_host_details( qry_prov=self.query_provider, host_entity=result.host_entity ) @@ -307,7 +311,7 @@ def lookup_ti_for_asn_ips(self): flow_sum_df=self._last_result.flow_summary, select_asn=self.asn_selector ) ti_results = _lookup_ip_ti( - flows_df=self._last_result.flow_summary, + flows_df=self._last_result.flow_index_data, selected_ips=selected_ips, ti_lookup=self.data_providers["tilookup"], ) @@ -336,7 +340,7 @@ def show_selected_asn_map(self) -> foliummap.FoliumMap: ) return None geo_map = _display_geo_map( - flow_index=self._last_result.flow_index_data, + flow_index=self._last_result.flow_summary, ip_locator=self.data_providers["geolitelookup"], host_entity=self._last_result.host_entity, ti_results=self._last_result.ti_results, @@ -550,13 +554,15 @@ def _get_flow_summary(flow_index): # %% # ASN Selection def _get_source_host_asns(host_entity): - host_ips = getattr(host_entity, "public_ips", []) + host_ips = getattr(host_entity, "PublicIPAddresses", []) host_ips.append(getattr(host_entity, "IpAddress", None)) host_asns = [] - for host_ip in host_ips: - if get_ip_type(host_ip) == "Public": - host_ip.ASNDescription, host_ip.ASNDetails = get_whois_info(host_ip) - host_asns.append(host_ip.ASNDescription) + for ip_entity in host_ips: + if get_ip_type(ip_entity.Address) == "Public": + ip_entity.ASNDescription, ip_entity.ASNDetails = get_whois_info( + ip_entity.Address + ) + host_asns.append(ip_entity.ASNDescription) return host_asns @@ -634,7 +640,8 @@ def ti_check_ser_sev(severity, threshold): def _format_ip_entity(ip_loc, row, ip_col): ip_entity = entities.IpAddress(Address=row[ip_col]) ip_loc.lookup_ip(ip_entity=ip_entity) - ip_entity.AdditionalData["protocol"] = row.L7Protocol + if "L7Protocol" in row: + ip_entity.AdditionalData["protocol"] = row.L7Protocol if "severity" in row: ip_entity.AdditionalData["threat severity"] = row["severity"] if "Details" in row: @@ -674,9 +681,14 @@ def _display_geo_map_all(flow_index, ip_locator, host_entity): ) icon_props = {"color": "green"} - for ips in host_entity.public_ips: - ips.AdditionalData["host"] = host_entity.HostName - folium_map.add_ip_cluster(ip_entities=host_entity.public_ips, **icon_props) + host_ips = getattr(host_entity, "PublicIPAddresses", []) + host_ip = getattr(host_entity, "IpAddress", None) + if host_ip: + host_ips.append(host_ip) + if host_ips: + for ips in host_ips: + ips.AdditionalData["host"] = host_entity.HostName or "unknown hostname" + folium_map.add_ip_cluster(ip_entities=host_ips, **icon_props) icon_props = {"color": "blue"} folium_map.add_ip_cluster(ip_entities=ips_out, **icon_props) icon_props = {"color": "purple"} @@ -696,34 +708,41 @@ def _display_geo_map(flow_index, ip_locator, host_entity, ti_results, select_asn # Get the flow records for all flows not in the TI results selected_out = flow_index[flow_index["DestASN"].isin(select_asn.selected_items)] selected_in = flow_index[flow_index["SourceASN"].isin(select_asn.selected_items)] - if ti_results is not None and not ti_results.empty: - selected_out = selected_out[~selected_out["dest"].isin(ti_results["Ioc"])] - selected_in = selected_in[~selected_in["source"].isin(ti_results["Ioc"])] + sel_out_exp = _list_to_rows(selected_out, "dest_ips") + sel_in_exp = _list_to_rows(selected_in, "source_ips") + sel_out_exp = sel_out_exp[~sel_out_exp["dest_ips"].isin(ti_results["Ioc"])] + sel_in_exp = sel_in_exp[~sel_in_exp["source_ips"].isin(ti_results["Ioc"])] - if selected_out.empty: + if sel_out_exp.empty: ips_out = [] else: + nb_data_wait("IP Geolocation") ips_out = list( - selected_out.apply( - lambda x: _format_ip_entity(ip_locator, x, "dest"), axis=1 + sel_out_exp.apply( + lambda x: _format_ip_entity(ip_locator, x, "dest_ips"), axis=1 ) ) - if selected_in.empty: + if sel_in_exp.empty: ips_in = [] else: nb_data_wait("IP Geolocation") ips_in = list( - selected_in.apply( - lambda x: _format_ip_entity(ip_locator, x, "source"), axis=1 + sel_in_exp.apply( + lambda x: _format_ip_entity(ip_locator, x, "source_ips"), axis=1 ) ) icon_props = {"color": "green"} - for ip_addr in host_entity.public_ips: - ip_addr.AdditionalData["host"] = host_entity.HostName - folium_map.add_ip_cluster(ip_entities=host_entity.public_ips, **icon_props) + host_ips = getattr(host_entity, "PublicIPAddresses", []) + host_ip = getattr(host_entity, "IpAddress", None) + if host_ip: + host_ips.append(host_ip) + if host_ips: + for ip_addr in host_ips: + ip_addr.AdditionalData["host"] = host_entity.HostName or "unknown hostname" + folium_map.add_ip_cluster(ip_entities=host_ips, **icon_props) icon_props = {"color": "blue"} folium_map.add_ip_cluster(ip_entities=ips_out, **icon_props) icon_props = {"color": "purple"} @@ -737,3 +756,18 @@ def _display_geo_map(flow_index, ip_locator, host_entity, ti_results, select_asn folium_map.center_map() return folium_map + + +def _list_to_rows(data, col): + orig_cols = data.columns + item_col = f"{col}_list_item$$" + ren_col = {item_col: col} + return ( + pd.DataFrame(data[col].to_list()) + .replace([None], np.nan) # convert any Nones to NaN + .merge(data, right_index=True, left_index=True) + .melt(id_vars=orig_cols, value_name=item_col) + .dropna(subset=[item_col]) # get rid of rows with NaNs in this col + .drop([col, "variable"], axis=1) + .rename(columns=ren_col) + ) diff --git a/msticnb/nb/azsent/network/network_flow_summary.yaml b/msticnb/nb/azsent/network/network_flow_summary.yaml index 35ca400..9573359 100644 --- a/msticnb/nb/azsent/network/network_flow_summary.yaml +++ b/msticnb/nb/azsent/network/network_flow_summary.yaml @@ -7,8 +7,8 @@ metadata: - flow_summary: Create a summarization of all flows and all flows grouped by ASN. - other_options: - resolve_host: Try to resolve the host name before other operations. + other_options: - geo_map: Plot a map of all IP address locations in communication with the host (see the method below for plotting selected IPs only). diff --git a/msticnb/nb_pivot.py b/msticnb/nb_pivot.py index d785d81..2b96146 100644 --- a/msticnb/nb_pivot.py +++ b/msticnb/nb_pivot.py @@ -22,6 +22,7 @@ "host": {"Host": "HostName"}, "account": {"Account": "Name"}, "ip_address": {"IpAddress": "Address"}, + "alert": {"Alert": "AlertType"}, } @@ -75,9 +76,6 @@ def _wrap_run_func(func: Callable[[Any], Any], get_time_span: Callable[[], TimeS def _wrapped_func(*args, **kwargs): time_span = get_time_span() kwargs.update({"timespan": time_span}) - result = func(*args, **kwargs) - if isinstance(result, list) and len(list) == 1: - return result[0] - return result + return func(*args, **kwargs) return _wrapped_func diff --git a/msticnb/nblib/azsent/alert.py b/msticnb/nblib/azsent/alert.py new file mode 100644 index 0000000..2ca5bee --- /dev/null +++ b/msticnb/nblib/azsent/alert.py @@ -0,0 +1,26 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +"""Alert utility functions.""" +from msticpy.nbtools import nbdisplay, nbwidgets +from ..._version import VERSION + +__version__ = VERSION +__author__ = "Ian Hellen" + + +def browse_alerts(nb_result, alert_attr="related_alerts") -> nbwidgets.SelectAlert: + """Return alert browser/viewer.""" + if nb_result is None or not hasattr(nb_result, alert_attr): + return None + rel_alerts = getattr(nb_result, alert_attr, None) + if rel_alerts is None or rel_alerts.empty: + return None + + if "CompromisedEntity" not in rel_alerts: + rel_alerts["CompromisedEntity"] = "n/a" + if "StartTimeUtc" not in rel_alerts: + rel_alerts["StartTimeUtc"] = rel_alerts["TimeGenerated"] + return nbwidgets.SelectAlert(alerts=rel_alerts, action=nbdisplay.format_alert) diff --git a/msticnb/nblib/azsent/host.py b/msticnb/nblib/azsent/host.py index 9441395..bdc649e 100644 --- a/msticnb/nblib/azsent/host.py +++ b/msticnb/nblib/azsent/host.py @@ -6,7 +6,7 @@ """host_network_summary notebooklet.""" from collections import namedtuple from functools import lru_cache -from typing import Dict +from typing import Dict, List, Set import pandas as pd from msticpy.common.timespan import TimeSpan @@ -15,7 +15,7 @@ from msticpy.sectools.ip_utils import convert_to_ip_entities from ..._version import VERSION -from ...common import MsticnbMissingParameterError, nb_data_wait, nb_print +from ...common import MsticnbMissingParameterError, nb_data_wait, nb_print, df_has_data __version__ = VERSION __author__ = "Ian Hellen" @@ -51,7 +51,7 @@ def get_heartbeat( host_hb_df = qry_prov.Network.get_heartbeat_for_host(host_name=host_name) elif host_ip: host_hb_df = qry_prov.Network.get_heartbeat_for_ip(ip_address=host_ip) - if host_hb_df is not None and not host_hb_df.empty: + if df_has_data(host_hb_df): host_entity = populate_host_entity(heartbeat_df=host_hb_df) return host_entity @@ -91,21 +91,9 @@ def get_aznet_topology( elif host_ip: az_net_df = qry_prov.Network.host_for_ip(ip_address=host_ip) - if az_net_df is None: - return - if not az_net_df.empty: - host_entity.private_ips = convert_to_ip_entities( - az_net_df["PrivateIPAddresses"].iloc[0] - ) - host_entity.public_ips = convert_to_ip_entities( - az_net_df["PublicIPAddresses"].iloc[0] - ) - - else: - if "private_ips" not in host_entity: - host_entity.private_ips = [] - if "public_ips" not in host_entity: - host_entity.public_ips = [] + if df_has_data(az_net_df): + host_entity = populate_host_entity(az_net_df=az_net_df, host_entity=host_entity) + return host_entity HostNameVerif = namedtuple("HostNameVerif", "host_name, host_type, host_names") @@ -202,8 +190,9 @@ def verify_host_name( # %% # Populate or create a host entity from Heartbeat and Azure Topology information def populate_host_entity( - heartbeat_df: pd.DataFrame, + heartbeat_df: pd.DataFrame = None, az_net_df: pd.DataFrame = None, + vmcomputer_df: pd.DataFrame = None, host_entity: entities.Host = None, ) -> entities.Host: """ @@ -228,16 +217,67 @@ def populate_host_entity( if host_entity is None: host_entity = entities.Host() + ip_entities: List[entities.IpAddress] = [] + ip_unique: Set[str] = set() # Extract data from available dataframes - ip_hb = heartbeat_df.iloc[0] + if df_has_data(heartbeat_df): + ip_hb = heartbeat_df.iloc[0] # type: ignore + ip_entity = _extract_heartbeat(ip_hb, host_entity) + ip_entities.append(ip_entity) + ip_unique.add(ip_entity.Address) + + if df_has_data(vmcomputer_df): + ip_vm = vmcomputer_df.iloc[0] # type: ignore + _extract_vmcomputer(ip_vm, host_entity) + ip_ents = convert_to_ip_entities(data=vmcomputer_df, ip_col="Ipv4Addresses") + ip_entities.extend( + ip_ent for ip_ent in ip_ents if ip_ent.Address not in ip_unique + ) + ip_unique |= {ip_ent.Address for ip_ent in ip_ents} + ip_ents = convert_to_ip_entities(data=vmcomputer_df, ip_col="Ipv6Addresses") + ip_entities.extend( + ip_ent for ip_ent in ip_ents if ip_ent.Address not in ip_unique + ) + ip_unique |= {ip_ent.Address for ip_ent in ip_ents} + + # If Azure network data present add this to host record + if df_has_data(az_net_df): + if not host_entity.HostName: + host_entity.HostName = az_net_df.iloc[0].Computer # type: ignore + ip_priv = convert_to_ip_entities(data=az_net_df, ip_col="PrivateIPAddresses") + ip_pub = convert_to_ip_entities(data=az_net_df, ip_col="PublicIPAddresses") + host_entity["PrivateIPAddresses"] = [] + host_entity["PrivateIPAddresses"].extend( + ip_ent for ip_ent in ip_priv if ip_ent.Address not in ip_unique + ) + host_entity["PublicIPAddresses"] = [] + host_entity["PublicIPAddresses"].extend( + ip_ent for ip_ent in ip_pub if ip_ent.Address not in ip_unique + ) + ip_entities.extend(host_entity["PrivateIPAddresses"]) + ip_entities.extend(host_entity["PublicIPAddresses"]) + + host_entity["IPAddresses"] = ip_entities + if not hasattr(host_entity, "IpAddress") and len(ip_entities) == 1: + host_entity["IPAddress"] = ip_entities[0] + + return host_entity + + +def _extract_heartbeat(ip_hb, host_entity): if not host_entity.HostName: host_entity.HostName = ip_hb["Computer"] # type: ignore host_entity.SourceComputerId = ip_hb["SourceComputerId"] # type: ignore - host_entity.OSType = ip_hb["OSType"] # type: ignore + host_entity.OSFamily = ( + entities.OSFamily.Windows + if ip_hb["OSType"] == "Windows" + else entities.OSFamily.Linux + ) host_entity.OSName = ip_hb["OSName"] # type: ignore host_entity.OSVMajorVersion = ip_hb["OSMajorVersion"] # type: ignore host_entity.OSVMinorVersion = ip_hb["OSMinorVersion"] # type: ignore host_entity.Environment = ip_hb["ComputerEnvironment"] # type: ignore + host_entity.AgentId = ip_hb["SourceComputerId"] host_entity.OmsSolutions = [ # type: ignore sol.strip() for sol in ip_hb["Solutions"].split(",") ] @@ -249,7 +289,6 @@ def populate_host_entity( "ResourceType": ip_hb["ResourceType"], "ResourceGroup": ip_hb["ResourceGroup"], "ResourceId": ip_hb["ResourceId"], - "Solutions": ip_hb["Solutions"], } # Populate IP data @@ -259,19 +298,27 @@ def populate_host_entity( geoloc_entity.Longitude = ip_hb["RemoteIPLongitude"] # type: ignore geoloc_entity.Latitude = ip_hb["RemoteIPLatitude"] # type: ignore ip_entity.Location = geoloc_entity # type: ignore - host_entity.IPAddress = ip_entity # type: ignore + host_entity.IpAddress = ip_entity # type: ignore + return ip_entity - # If Azure network data present add this to host record - if az_net_df is not None and not az_net_df.empty: - if len(az_net_df) == 1: - priv_addr_str = az_net_df["PrivateIPAddresses"].loc[0] - ip_entity["private_ips"] = convert_to_ip_entities(priv_addr_str) - pub_addr_str = az_net_df["PublicIPAddresses"].loc[0] - ip_entity["public_ips"] = convert_to_ip_entities(pub_addr_str) - else: - if "private_ips" not in ip_entity: - host_entity["private_ips"] = [] - if "public_ips" not in ip_entity: - host_entity["public_ips"] = [] - return host_entity +def _extract_vmcomputer(ip_vm, host_entity): + if not host_entity.HostName: + host_entity.HostName = ip_vm["Computer"] # type: ignore + host_entity.OSFamily = ( + entities.OSFamily.Windows + if ip_vm["OperatingSystemFamily"].casefold() == "windows" + else entities.OSFamily.Linux + ) + host_entity.OSName = ip_vm["OperatingSystemFullName"] # type: ignore + host_entity.Environment = "Azure" # type: ignore + host_entity.AgentId = ip_vm["AgentId"] + host_entity.VMUUID = ip_vm["AzureVmId"] # type: ignore + if host_entity.Environment == "Azure": + host_entity.AzureDetails = { # type: ignore + "SubscriptionId": ip_vm["AzureSubscriptionId"], + "ResourceProvider": ip_vm["HostingProvider"], + "ResourceType": ip_vm["VirtualMachineType"], + "ResourceGroup": ip_vm["AzureResourceGroup"], + "ResourceId": ip_vm["_ResourceId"], + } diff --git a/msticnb/nblib/entity_tools.py b/msticnb/nblib/entity_tools.py new file mode 100644 index 0000000..1544e42 --- /dev/null +++ b/msticnb/nblib/entity_tools.py @@ -0,0 +1,47 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +"""Entity Helper functions.""" +from typing import Dict, List, Union + +import pandas as pd + +from .._version import VERSION + +__version__ = VERSION +__author__ = "Ian Hellen" + + +def extract_entities( + data: pd.DataFrame, cols: Union[str, List[str]] +) -> Dict[str, List[str]]: + """ + Extract items from a column (strings or lists). + + Parameters + ---------- + data : pd.DataFrame + DataFrame to parse + cols : Union[str, List[str]] + Columns to use for input + + Returns + ------- + Dict[str, List[str]] + Dictionary of (column: result_list) + + """ + if not isinstance(cols, list): + cols = [cols] + + val_results = {} + for col in cols: + ent_vals = list(data[col].values) + test_val = data[col].iloc[0] + if isinstance(test_val, list): + ent_vals = list({ent for ent_list in ent_vals for ent in ent_list}) + val_results[col] = ent_vals + + return val_results diff --git a/msticnb/nblib/iptools.py b/msticnb/nblib/iptools.py index a3ecc6f..741c204 100644 --- a/msticnb/nblib/iptools.py +++ b/msticnb/nblib/iptools.py @@ -4,18 +4,27 @@ # license information. # -------------------------------------------------------------------------- """IP Helper functions.""" -import pandas as pd +import re +from collections import defaultdict +from ipaddress import AddressValueError, IPv4Address, IPv4Network, ip_address +from typing import Optional +import pandas as pd +import requests from msticpy.sectools.ip_utils import get_whois_df -from ..common import nb_markdown from .._version import VERSION +from ..common import nb_markdown __version__ = VERSION __author__ = "Ian Hellen" -def get_ip_ti(ti_lookup: "TILookup", data: pd.DataFrame, ip_col: str) -> pd.DataFrame: +def get_ip_ti( + ti_lookup: "TILookup", # type: ignore + data: pd.DataFrame, + ip_col: str, +) -> pd.DataFrame: """ Lookup Threat Intel for IPAddress. @@ -53,7 +62,9 @@ def _normalize_ip4(data, ip_col): ) -def get_geoip_whois(geo_lookup: "GeoIpLookup", data: pd.DataFrame, ip_col: str): +def get_geoip_whois( + geo_lookup: "GeoIpLookup", data: pd.DataFrame, ip_col: str +): # type: ignore """ Get GeoIP and WhoIs data for IPs. @@ -81,3 +92,63 @@ def get_geoip_whois(geo_lookup: "GeoIpLookup", data: pd.DataFrame, ip_col: str): nb_markdown(f"Querying WhoIs for {len(data)} ip addresses...") # Get the WhoIs results return get_whois_df(geo_df, "IpAddress", whois_col="Whois_data") + + +_VPS_URL = "https://raw.githubusercontent.com/Azure/Azure-Sentinel/master/Sample%20Data/Feeds/VPS_Networks.csv" +_NET_DICT = defaultdict(list) + + +def _build_vps_dict(): + resp = requests.get(_VPS_URL) + + # get rid of unicode bytes + net_list = re.sub(r"[^\d./\n]", "", resp.text).split("\n") + + # Build network dict - keyed by 16 bit prefix + for net in net_list: + pref, ip4_net = _to_ip4_net(net) + if pref: + _NET_DICT[pref].append(ip4_net) + return _NET_DICT + + +def _get_prefix(ip_addr): + return ".".join(ip_addr.split(".", maxsplit=2)[:2]) + + +def _to_ip4_net(net): + try: + return _get_prefix(net), IPv4Network(net) + except AddressValueError as err: + print(err, type(err)) + return None, None + + +def is_in_vps_net(ip_addr: str) -> Optional[IPv4Network]: + """ + Return IpV4 Network if `ip_addr` is in a found VPS network. + + Parameters + ---------- + ip_addr : str + IP Address + + Returns + ------- + Optional[IPv4Network] + IpV4 network if `ip_addr` is a member, else None + + """ + if not _NET_DICT: + print("Please wait. Getting VPS data...", end="") + _build_vps_dict() + print("done") + ip_pref = _get_prefix(ip_addr) + ip4_addr = ip_address(ip_addr) + if not isinstance(ip4_addr, IPv4Address): + return None + if ip_pref in _NET_DICT: + for net in _NET_DICT[ip_pref]: + if ip_addr in net: + return net + return None diff --git a/msticnb/notebooklet.py b/msticnb/notebooklet.py index 9377502..380ff68 100644 --- a/msticnb/notebooklet.py +++ b/msticnb/notebooklet.py @@ -11,158 +11,23 @@ from functools import wraps from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple -import bokeh.io import pandas as pd -from bokeh.models import LayoutDOM -from bokeh.plotting.figure import Figure from IPython.core.getipython import get_ipython from IPython.display import HTML, display -from tqdm import tqdm +from tqdm.auto import tqdm from msticpy.common.timespan import TimeSpan from ._version import VERSION from .common import MsticnbDataProviderError, MsticnbError from .data_providers import DataProviders from .nb_metadata import NBMetadata, read_mod_metadata +from .notebooklet_result import NotebookletResult from .options import get_opt, set_opt __version__ = VERSION __author__ = "Ian Hellen" -# pylint: disable=too-few-public-methods -class NotebookletResult: - """Base result class.""" - - def __init__( - self, - description: Optional[str] = None, - timespan: Optional[TimeSpan] = None, - notebooklet: Optional["Notebooklet"] = None, - ): - """ - Create new Notebooklet result instance. - - Parameters - ---------- - description : Optional[str], optional - Result description, by default None - timespan : Optional[TimeSpan], optional - TimeSpan for the results, by default None - notebooklet : Optional[, optional - Originating notebooklet, by default None - """ - self.description = description or self.__class__.__qualname__ - self.timespan = timespan - self.notebooklet = notebooklet - self._attribute_desc: Dict[str, Tuple[str, str]] = {} - - # Populate the `_attribute_desc` dictionary on init. - self._populate_attr_desc() - - def __str__(self): - """Return string representation of object.""" - return "\n".join( - f"{name}: {self._str_repr(val)}" - for name, val in self.__dict__.items() - if not name.startswith("_") - ) - - @staticmethod - def _str_repr(obj): - if isinstance(obj, pd.DataFrame): - return f"DataFrame: {len(obj)} rows" - if isinstance(obj, LayoutDOM): - return "Bokeh plot" - return str(obj) - - # pylint: disable=unsubscriptable-object, no-member - def _repr_html_(self): - """Display HTML represention for notebook.""" - attrib_lines = [] - for name, val in self.__dict__.items(): - if name.startswith("_"): - continue - attr_desc = "" - attr_type, attr_text = self._attribute_desc.get( - name, (None, None) - ) # type: ignore - if attr_type: - attr_desc += f"[{attr_type}]
" - if attr_text: - attr_desc += f"{attr_text}
" - attrib_lines.append(f"

{name}

{attr_desc}{self._html_repr(val)}") - return "
".join(attrib_lines) - - # pylint: enable=unsubscriptable-object, no-member - - # pylint: disable=protected-access - @staticmethod - def _html_repr(obj): - if isinstance(obj, pd.DataFrame): - return obj.head(5)._repr_html_() - if isinstance(obj, (LayoutDOM, Figure)): - bokeh.io.show(obj) - if hasattr(obj, "_repr_html_"): - return obj._repr_html_() - return str(obj).replace("\n", "
").replace(" ", " ") - - # pylint: enable=protected-access - - def _populate_attr_desc(self): - indent = " " * 4 - in_attribs = False - attr_name = None - attr_type = None - attr_dict = {} - attr_lines = [] - doc_str = inspect.cleandoc(self.__doc__) - for line in doc_str.split("\n"): - if line.strip() == "Attributes": - in_attribs = True - continue - if ( - line.strip() == "-" * len("Attributes") - or not in_attribs - or not line.strip() - ): - continue - if not line.startswith(indent): - # if existing attribute, add to dict - if attr_name: - attr_dict[attr_name] = attr_type, " ".join(attr_lines) - attr_name, attr_type = [item.strip() for item in line.split(":")] - attr_lines = [] - else: - attr_lines.append(line.strip()) - attr_dict[attr_name] = attr_type, " ".join(attr_lines) - if "timespan" not in attr_dict: - attr_dict["timespan"] = ( - "TimeSpan", - "Time span for the queried results data.", - ) - # pylint: disable=no-member - self._attribute_desc.update(attr_dict) # type: ignore - # pylint: enable=no-member - - @property - def properties(self): - """Return names of all properties.""" - return [ - name - for name, val in self.__dict__.items() - if val is not None and not name.startswith("_") - ] - - def prop_doc(self, name) -> Tuple[str, str]: - """Get the property documentation for the property.""" - # pylint: disable=unsupported-membership-test, unsubscriptable-object - if name in self._attribute_desc: - return self._attribute_desc[name] - # pylint: enable=unsupported-membership-test, unsubscriptable-object - raise KeyError(f"Unknown property {name}.") - - class Notebooklet(ABC): """Base class for Notebooklets.""" @@ -306,7 +171,7 @@ def run( if sub_options: self.options = list(set(def_options) - sub_options) if add_options: - self.options = list(set(self.options) | add_options) + self.options = list(set(def_options) | add_options) if not (add_options or sub_options): self.options = list(options) self._set_tqdm_notebook(get_opt("verbose")) @@ -603,3 +468,76 @@ def _get_doc(cls, fmt): """Return documentation func. placeholder.""" del fmt return "No documentation available." + + def check_valid_result_data(self, attrib: str = None) -> bool: + """ + Check that the result is valid and `attrib` contains data. + + Parameters + ---------- + attrib : str + Name of the attribute to check, if None this function + only checks for a valid _last_result. + + Returns + ------- + bool + Returns True if valid data is available, else False. + + """ + if self._last_result is None: + print( + "No current result." + "Please use 'run()' to fetch the data before using this method." + ) + return False + if not attrib: + return True + data_obj = getattr(self._last_result, attrib) + if data_obj is None or isinstance(data_obj, pd.DataFrame) and data_obj.empty: + print(f"No data is available for last_result.{attrib}.") + return False + return True + + def check_table_exists(self, table: str) -> bool: + """ + Check to see if the table exists in the provider. + + Parameters + ---------- + table : str + Table name + + Returns + ------- + bool + True if the table exists, otherwise False. + + """ + if not self.query_provider: + print(f"No query provider for table {table} is available.") + return False + if table not in self.query_provider.schema_tables: + print(f"table {table} is not available.") + return False + return True + + def get_methods(self) -> Dict[str, Callable[[Any], Any]]: + """Return methods available for this class.""" + meths = inspect.getmembers(self, inspect.ismethod) + cls_selector = f"bound method {self.__class__.__name__.rsplit('.')[0]}" + return { + meth[0]: meth[1] + for meth in meths + if cls_selector in str(meth[1]) and not meth[0].startswith("_") + } + + def list_methods(self) -> List[str]: + """Return list of methods with descriptions.""" + methods = self.get_methods() + method_desc: List[str] = [] + for name, method in methods.items(): + f_doc = inspect.getdoc(method) + desc = f_doc.split("\n", maxsplit=1)[0] if f_doc else "" + method_desc.append(f"{name} - '{desc}'") + return method_desc diff --git a/msticnb/notebooklet_result.py b/msticnb/notebooklet_result.py new file mode 100644 index 0000000..23448a0 --- /dev/null +++ b/msticnb/notebooklet_result.py @@ -0,0 +1,173 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +"""Notebooklet Result base classes.""" +import inspect +from typing import Dict, Optional, Tuple + +import pandas as pd +from bokeh.models import LayoutDOM +from bokeh.plotting.figure import Figure +from msticpy.common.timespan import TimeSpan + +from ._version import VERSION +from .common import show_bokeh + +__version__ = VERSION +__author__ = "Ian Hellen" + + +# pylint: disable=too-few-public-methods +class NotebookletResult: + """Base result class.""" + + _TITLE_STYLE = "background-color:lightgray; padding:5px;" + + def __init__( + self, + description: Optional[str] = None, + timespan: Optional[TimeSpan] = None, + notebooklet: Optional["Notebooklet"] = None, # type: ignore + ): + """ + Create new Notebooklet result instance. + + Parameters + ---------- + description : Optional[str], optional + Result description, by default None + timespan : Optional[TimeSpan], optional + TimeSpan for the results, by default None + notebooklet : Optional[, optional + Originating notebooklet, by default None + """ + self.description = description or self.__class__.__qualname__ + self.timespan = timespan + self.notebooklet = notebooklet + self._attribute_desc: Dict[str, Tuple[str, str]] = {} + + # Populate the `_attribute_desc` dictionary on init. + self._populate_attr_desc() + + def __str__(self): + """Return string representation of object.""" + return "\n".join( + f"{name}: {self._str_repr(val)}" + for name, val in self.__dict__.items() + if not name.startswith("_") and val is not None + ) + + @staticmethod + def _str_repr(obj): + if isinstance(obj, pd.DataFrame): + return f"DataFrame: {len(obj)} rows" + if isinstance(obj, LayoutDOM): + return "Bokeh plot" + return str(obj) + + # pylint: disable=unsubscriptable-object, no-member + def _repr_html_(self): + """Display HTML represention for notebook.""" + attrib_lines = [] + for name, val in self.__dict__.items(): + if name.startswith("_") or val is None: + continue + attr_desc = "" + attr_type, attr_text = self._attribute_desc.get( + name, (None, None) + ) # type: ignore + if attr_text: + attr_desc += f"{attr_text}" + if attr_type: + attr_desc += f" Type: [{attr_type}]" + attrib_lines.extend( + [ + f"

property: {name}

", + f"{attr_desc}
{self._html_repr(val)}
", + ] + ) + return "".join(attrib_lines) + + # pylint: enable=unsubscriptable-object, no-member + + # pylint: disable=protected-access + @staticmethod + def _html_repr(obj): + if isinstance(obj, pd.DataFrame): + return obj.head(5)._repr_html_() + if isinstance(obj, (LayoutDOM, Figure)): + show_bokeh(obj) + if hasattr(obj, "_repr_html_"): + return obj._repr_html_() + return str(obj).replace("\n", "
").replace(" ", " ") + + # pylint: enable=protected-access + + def __getattr__(self, name): + """Proxy attributes of the notebooklet member.""" + if self.notebooklet: + return getattr(self.notebooklet, name) + + def _populate_attr_desc(self): + indent = " " * 4 + in_attribs = False + attr_name = None + attr_type = None + attr_dict = {} + attr_lines = [] + doc_str = inspect.cleandoc(self.__doc__) + for line in doc_str.split("\n"): + if line.strip() == "Attributes": + in_attribs = True + continue + if ( + line.strip() == "-" * len("Attributes") + or not in_attribs + or not line.strip() + ): + continue + if not line.startswith(indent): + # if existing attribute, add to dict + if attr_name: + attr_dict[attr_name] = attr_type, " ".join(attr_lines) + if ":" in line: + attr_name, attr_type = [item.strip() for item in line.split(":")] + else: + attr_name = line.strip() + attr_type = "object" + attr_lines = [] + else: + attr_lines.append(line.strip()) + attr_dict[attr_name] = attr_type, " ".join(attr_lines) + if "timespan" not in attr_dict: + attr_dict["timespan"] = ( + "TimeSpan", + "Time span for the queried results data.", + ) + if "notebooklet" not in attr_dict: + attr_dict["notebooklet"] = ( + "Notebooklet", + "The notebooklet instance that created this result.", + ) + # pylint: disable=no-member + self._attribute_desc.update(attr_dict) # type: ignore + # pylint: enable=no-member + + @property + def properties(self): + """Return names of all properties.""" + return [ + name + for name, val in self.__dict__.items() + if val is not None and not name.startswith("_") + ] + + def prop_doc(self, name) -> Tuple[str, str]: + """Get the property documentation for the property.""" + # pylint: disable=unsupported-membership-test, unsubscriptable-object + if name in self._attribute_desc: + return self._attribute_desc[name] + # pylint: enable=unsupported-membership-test, unsubscriptable-object + raise KeyError(f"Unknown property {name}.") diff --git a/tests/nb/azsent/account/test_account_summary.py b/tests/nb/azsent/account/test_account_summary.py index db623ca..a5ac1ee 100644 --- a/tests/nb/azsent/account/test_account_summary.py +++ b/tests/nb/azsent/account/test_account_summary.py @@ -55,7 +55,7 @@ def test_account_summary_notebooklet(): bm_select = test_nb.browse_bookmarks() assert isinstance(bm_select, nbwidgets.SelectItem) - test_nb.find_additional_data() + test_nb.get_additional_data() assert isinstance(result.account_timeline_by_ip, LayoutDOM) if "Windows" in acct_item or "Linux" in acct_item: @@ -73,3 +73,12 @@ def test_account_summary_notebooklet(): assert isinstance(result.azure_activity_summary, pd.DataFrame) assert isinstance(result.azure_timeline_by_provider, LayoutDOM) assert isinstance(result.azure_timeline_by_operation, LayoutDOM) + + result.display_alert_timeline() + result.browse_accounts() + result.browse_alerts() + result.browse_bookmarks() + result.az_activity_timeline_by_provider() + result.az_activity_timeline_by_ip() + result.az_activity_timeline_by_operation() + result.host_logon_timeline() diff --git a/tests/nb/azsent/alert/test_ti_enrich.py b/tests/nb/azsent/alert/test_ti_enrich.py index 5e1eaac..3b45b92 100644 --- a/tests/nb/azsent/alert/test_ti_enrich.py +++ b/tests/nb/azsent/alert/test_ti_enrich.py @@ -10,20 +10,19 @@ import pytest from msticnb import nblts -from msticnb.data_providers import init +from msticnb import data_providers from msticpy.nbtools.nbwidgets import SelectAlert -from ....unit_test_lib import TEST_DATA_PATH +from ....unit_test_lib import TEST_DATA_PATH, GeoIPLiteMock @pytest.fixture -def nbltdata(): +def nbltdata(monkeypatch): """Generate test nblt output.""" test_file = Path.cwd().joinpath(TEST_DATA_PATH).joinpath("alerts_list.pkl") - test_config = str( - Path.cwd().joinpath(TEST_DATA_PATH).joinpath("msticpyconfig-test.yaml") - ) - init("LocalData", providers=["tilookup"]) + + monkeypatch.setattr(data_providers, "GeoLiteLookup", GeoIPLiteMock) + data_providers.init("LocalData", providers=["tilookup", "geolitelookup"]) test_nblt = nblts.azsent.alert.EnrichAlerts() # pylint: disable=no-member test_df = pd.read_pickle(test_file) test_df["Entities"] = "" diff --git a/tests/nb/azsent/host/test_host_summary.py b/tests/nb/azsent/host/test_host_summary.py index a550bbe..73f5cb6 100644 --- a/tests/nb/azsent/host/test_host_summary.py +++ b/tests/nb/azsent/host/test_host_summary.py @@ -5,10 +5,10 @@ # -------------------------------------------------------------------------- """Test the nb_template class.""" # from contextlib import redirect_stdout -import unittest from pathlib import Path import pandas as pd +import pytest_check as check from msticnb import nblts from msticnb.data_providers import init from msticpy.common.timespan import TimeSpan @@ -18,25 +18,22 @@ # pylint: disable=no-member -class TestHostSummary(unittest.TestCase): - """Tests for nb_template.""" - - def test_host_summary_notebooklet(self): - """Test basic run of notebooklet.""" - test_data = str(Path(TEST_DATA_PATH).absolute()) - init( - query_provider="LocalData", - LocalData_data_paths=[test_data], - LocalData_query_paths=[test_data], - ) - - test_nb = nblts.azsent.host.HostSummary() - tspan = TimeSpan(period="1D") - - result = test_nb.run(value="myhost", timespan=tspan) - self.assertIsNotNone(result.host_entity) - self.assertIsNotNone(result.related_alerts) - self.assertIsInstance(result.related_alerts, pd.DataFrame) - self.assertIsNotNone(result.alert_timeline) - self.assertIsNotNone(result.related_bookmarks) - self.assertIsInstance(result.related_bookmarks, pd.DataFrame) +def test_host_summary_notebooklet(): + """Test basic run of notebooklet.""" + test_data = str(Path(TEST_DATA_PATH).absolute()) + init( + query_provider="LocalData", + LocalData_data_paths=[test_data], + LocalData_query_paths=[test_data], + ) + + test_nb = nblts.azsent.host.HostSummary() + tspan = TimeSpan(period="1D") + + result = test_nb.run(value="myhost", timespan=tspan) + check.is_not_none(result.host_entity) + check.is_not_none(result.related_alerts) + check.is_instance(result.related_alerts, pd.DataFrame) + check.is_not_none(result.alert_timeline) + check.is_not_none(result.related_bookmarks) + check.is_instance(result.related_bookmarks, pd.DataFrame) diff --git a/tests/nb/azsent/host/test_hostlogonsummary.py b/tests/nb/azsent/host/test_hostlogonsummary.py index 9e06ca4..44aca63 100644 --- a/tests/nb/azsent/host/test_hostlogonsummary.py +++ b/tests/nb/azsent/host/test_hostlogonsummary.py @@ -12,27 +12,28 @@ from bokeh.layouts import Column from bokeh.plotting import Figure from msticnb import nblts -from msticnb.data_providers import init +from msticnb import data_providers from msticpy.common.timespan import TimeSpan from msticpy.nbtools.foliummap import FoliumMap -from ....unit_test_lib import TEST_DATA_PATH +from ....unit_test_lib import TEST_DATA_PATH, GeoIPLiteMock # nosec # pylint: disable=no-member @pytest.fixture -def nbltdata(): +def nbltdata(monkeypatch): """Generate test nblt output.""" test_file = Path.cwd().joinpath(TEST_DATA_PATH).joinpath("lx_host_logons.pkl") - init("LocalData", providers=["tilookup"]) + monkeypatch.setattr(data_providers, "GeoLiteLookup", GeoIPLiteMock) + data_providers.init("LocalData", providers=["tilookup", "geolitelookup"]) test_nblt = nblts.azsent.host.HostLogonsSummary() test_df = pd.read_pickle(test_file) return test_nblt.run(data=test_df, options=["-map"], silent=True) -def test_ouput_types(nbltdata): # pylint: disable=redefined-outer-name +def test_output_types(nbltdata): # pylint: disable=redefined-outer-name """Test nblt output types.""" assert isinstance(nbltdata.failed_success, pd.DataFrame) assert isinstance(nbltdata.logon_sessions, pd.DataFrame) @@ -42,21 +43,22 @@ def test_ouput_types(nbltdata): # pylint: disable=redefined-outer-name assert isinstance(nbltdata.timeline, Column) -def test_ouput_values(nbltdata): # pylint: disable=redefined-outer-name +def test_output_values(nbltdata): # pylint: disable=redefined-outer-name """Test nblt output values.""" assert nbltdata.failed_success.iloc[0]["LogonResult"] == "Success" assert nbltdata.logon_sessions.iloc[0]["HostName"] == "VictimHost" assert nbltdata.logon_matrix.index[0] == ("peteb", "sshd") -def test_local_data(): +def test_local_data(monkeypatch): """Test nblt output types and values using LocalData provider.""" test_data = str(Path.cwd().joinpath(TEST_DATA_PATH)) - init( + monkeypatch.setattr(data_providers, "GeoLiteLookup", GeoIPLiteMock) + data_providers.init( query_provider="LocalData", LocalData_data_paths=[test_data], LocalData_query_paths=[test_data], - providers=["tilookup"], + providers=["tilookup", "geolitelookup"], ) test_nblt = nblts.azsent.host.HostLogonsSummary() diff --git a/tests/nb/azsent/host/test_win_host_events.py b/tests/nb/azsent/host/test_win_host_events.py index a1f0af5..2cff225 100644 --- a/tests/nb/azsent/host/test_win_host_events.py +++ b/tests/nb/azsent/host/test_win_host_events.py @@ -7,7 +7,7 @@ from pathlib import Path # from contextlib import redirect_stdout -import unittest +import pytest_check as check import pandas as pd @@ -21,31 +21,28 @@ # pylint: disable=no-member -class TestWinHostEvents(unittest.TestCase): - """Tests for nb_template.""" - - def test_winhostevents_notebooklet(self): - """Test basic run of notebooklet.""" - test_data = str(Path(TEST_DATA_PATH).absolute()) - init( - query_provider="LocalData", - LocalData_data_paths=[test_data], - LocalData_query_paths=[test_data], - ) - - test_nb = nblts.azsent.host.WinHostEvents() - tspan = TimeSpan(period="1D") - - result = test_nb.run(value="myhost", timespan=tspan) - self.assertIsNotNone(result.all_events) - self.assertIsInstance(result.all_events, pd.DataFrame) - self.assertIsNotNone(result.event_pivot) - self.assertIsInstance(result.event_pivot, pd.DataFrame) - self.assertIsNotNone(result.account_events) - self.assertIsInstance(result.account_events, pd.DataFrame) - self.assertIsNotNone(result.event_pivot) - self.assertIsInstance(result.event_pivot, pd.DataFrame) - # self.assertIsNotNone(result.account_timeline) - - exp_events = test_nb.expand_events(["5058", "5061"]) - self.assertIsInstance(exp_events, pd.DataFrame) +def test_winhostevents_notebooklet(): + """Test basic run of notebooklet.""" + test_data = str(Path(TEST_DATA_PATH).absolute()) + init( + query_provider="LocalData", + LocalData_data_paths=[test_data], + LocalData_query_paths=[test_data], + ) + + test_nb = nblts.azsent.host.WinHostEvents() + tspan = TimeSpan(period="1D") + + result = test_nb.run(value="myhost", timespan=tspan) + check.is_not_none(result.all_events) + check.is_instance(result.all_events, pd.DataFrame) + check.is_not_none(result.event_pivot) + check.is_instance(result.event_pivot, pd.DataFrame) + check.is_not_none(result.account_events) + check.is_instance(result.account_events, pd.DataFrame) + check.is_not_none(result.event_pivot) + check.is_instance(result.event_pivot, pd.DataFrame) + # check.is_not_none(result.account_timeline) + + exp_events = test_nb.expand_events(["5058", "5061"]) + check.is_instance(exp_events, pd.DataFrame) diff --git a/tests/nb/azsent/network/test_ip_summary.py b/tests/nb/azsent/network/test_ip_summary.py new file mode 100644 index 0000000..64efddd --- /dev/null +++ b/tests/nb/azsent/network/test_ip_summary.py @@ -0,0 +1,126 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +"""Test the nb_template class.""" +from pathlib import Path + +# from contextlib import redirect_stdout +import pytest_check as check + +from bokeh.models import LayoutDOM +import pandas as pd + +from msticpy.common.timespan import TimeSpan +from msticnb import nblts +from msticnb import data_providers + +from ....unit_test_lib import ( + TEST_DATA_PATH, + DEF_PROV_TABLES, + GeoIPLiteMock, + TILookupMock, +) + + +# pylint: disable=no-member + + +def test_ip_summary_notebooklet(monkeypatch): + """Test basic run of notebooklet.""" + test_data = str(Path(TEST_DATA_PATH).absolute()) + monkeypatch.setattr(data_providers, "GeoLiteLookup", GeoIPLiteMock) + monkeypatch.setattr(data_providers, "TILookup", TILookupMock) + data_providers.init( + query_provider="LocalData", + LocalData_data_paths=[test_data], + LocalData_query_paths=[test_data], + providers=["tilookup", "geolitelookup"], + ) + + test_nb = nblts.azsent.network.IpAddressSummary() + tspan = TimeSpan(period="1D") + + result = test_nb.run(value="11.1.2.3", timespan=tspan) + check.is_not_none(result.ip_entity) + check.equal(result.ip_type, "Public") + check.equal(result.ip_origin, "External") + check.is_in("CountryCode", result.geoip) + check.is_not_none(result.location) + check.is_not_none(result.notebooklet) + check.is_not_none(result.whois) + check.is_instance(result.related_alerts, pd.DataFrame) + check.is_not_none(test_nb.browse_alerts()) + check.is_instance(result.passive_dns, pd.DataFrame) + check.is_instance(result.ti_results, pd.DataFrame) + + +def test_ip_summary_notebooklet_internal(monkeypatch): + """Test basic run of notebooklet.""" + test_data = str(Path(TEST_DATA_PATH).absolute()) + monkeypatch.setattr(data_providers, "GeoLiteLookup", GeoIPLiteMock) + monkeypatch.setattr(data_providers, "TILookup", TILookupMock) + data_providers.init( + query_provider="LocalData", + LocalData_data_paths=[test_data], + LocalData_query_paths=[test_data], + providers=["tilookup", "geolitelookup"], + ) + + test_nb = nblts.azsent.network.IpAddressSummary() + tspan = TimeSpan(period="1D") + + test_nb.query_provider.schema.update({tab: {} for tab in DEF_PROV_TABLES}) + result = test_nb.run(value="40.76.43.124", timespan=tspan) + check.is_not_none(result.ip_entity) + check.equal(result.ip_type, "Public") + check.equal(result.ip_origin, "Internal") + check.is_not_none(result.whois) + check.is_instance(result.related_alerts, pd.DataFrame) + check.is_instance(result.heartbeat, pd.DataFrame) + check.is_instance(result.az_network_if, pd.DataFrame) + check.is_none(result.passive_dns) + check.is_none(result.ti_results) + + +def test_ip_summary_notebooklet_all(monkeypatch): + """Test basic run of notebooklet.""" + test_data = str(Path(TEST_DATA_PATH).absolute()) + monkeypatch.setattr(data_providers, "GeoLiteLookup", GeoIPLiteMock) + monkeypatch.setattr(data_providers, "TILookup", TILookupMock) + data_providers.init( + query_provider="LocalData", + LocalData_data_paths=[test_data], + LocalData_query_paths=[test_data], + providers=["tilookup", "geolitelookup"], + ) + + opts = ["+az_netflow", "+passive_dns", "+az_activity", "+office_365", "+ti"] + test_nb = nblts.azsent.network.IpAddressSummary() + tspan = TimeSpan(period="1D") + test_nb.query_provider.schema.update({tab: {} for tab in DEF_PROV_TABLES}) + + result = test_nb.run(value="40.76.43.124", timespan=tspan, options=opts) + check.is_not_none(result.ip_entity) + check.is_not_none(result.host_entity) + check.equal(result.host_entity.HostName, "MSTICAlertsWin1") + check.equal(result.host_entity.OSFamily.name, "Linux") + check.equal(result.ip_type, "Public") + check.equal(result.ip_origin, "Internal") + check.is_instance(result.heartbeat, pd.DataFrame) + check.is_instance(result.az_network_if, pd.DataFrame) + check.is_instance(result.az_network_flows, pd.DataFrame) + check.is_instance(result.az_network_flow_summary, pd.DataFrame) + check.is_instance(result.az_network_flows_timeline, LayoutDOM) + check.is_instance(result.aad_signins, pd.DataFrame) + check.is_instance(result.office_activity, pd.DataFrame) + check.is_instance(result.vmcomputer, pd.DataFrame) + + check.is_instance(test_nb.netflow_total_by_protocol(), LayoutDOM) + check.is_instance(test_nb.netflow_by_direction(), LayoutDOM) + + check.is_not_none(result.whois) + check.is_instance(result.related_alerts, pd.DataFrame) + check.is_instance(result.passive_dns, pd.DataFrame) + check.is_instance(result.ti_results, pd.DataFrame) diff --git a/tests/nb/azsent/network/test_network_flow_summary.py b/tests/nb/azsent/network/test_network_flow_summary.py index d37b20d..8839939 100644 --- a/tests/nb/azsent/network/test_network_flow_summary.py +++ b/tests/nb/azsent/network/test_network_flow_summary.py @@ -7,45 +7,55 @@ from pathlib import Path # from contextlib import redirect_stdout -import unittest +import pytest_check as check from bokeh.models import LayoutDOM import pandas as pd from msticpy.common.timespan import TimeSpan from msticnb import nblts -from msticnb.data_providers import init +from msticnb import data_providers -from ....unit_test_lib import TEST_DATA_PATH +from ....unit_test_lib import ( + TEST_DATA_PATH, + DEF_PROV_TABLES, + GeoIPLiteMock, + TILookupMock, +) # pylint: disable=no-member -class TestNetworkFlowSummary(unittest.TestCase): - """Tests for nb_template.""" - - def test_network_flow_summary_notebooklet(self): - """Test basic run of notebooklet.""" - test_data = str(Path(TEST_DATA_PATH).absolute()) - init( - query_provider="LocalData", - LocalData_data_paths=[test_data], - LocalData_query_paths=[test_data], - ) - - test_nb = nblts.azsent.network.NetworkFlowSummary() - tspan = TimeSpan(period="1D") - - result = test_nb.run(value="myhost", timespan=tspan) - self.assertIsNotNone(result.host_entity) - self.assertIsNotNone(result.network_flows) - self.assertIsInstance(result.network_flows, pd.DataFrame) - self.assertIsNotNone(result.plot_flows_by_protocol) - self.assertIsInstance(result.plot_flows_by_protocol, LayoutDOM) - self.assertIsNotNone(result.plot_flows_by_direction) - self.assertIsInstance(result.plot_flows_by_direction, LayoutDOM) - self.assertIsNotNone(result.plot_flow_values) - self.assertIsInstance(result.plot_flow_values, LayoutDOM) - self.assertIsNotNone(result.flow_index) - self.assertIsInstance(result.flow_summary, pd.DataFrame) +def test_network_flow_summary_notebooklet(monkeypatch): + """Test basic run of notebooklet.""" + test_data = str(Path(TEST_DATA_PATH).absolute()) + monkeypatch.setattr(data_providers, "GeoLiteLookup", GeoIPLiteMock) + monkeypatch.setattr(data_providers, "TILookup", TILookupMock) + data_providers.init( + query_provider="LocalData", + LocalData_data_paths=[test_data], + LocalData_query_paths=[test_data], + ) + + test_nb = nblts.azsent.network.NetworkFlowSummary() + tspan = TimeSpan(period="1D") + + test_nb.query_provider.schema.update({tab: {} for tab in DEF_PROV_TABLES}) + options = ["+geo_map"] + result = test_nb.run(value="myhost", timespan=tspan, options=options) + check.is_not_none(result.host_entity) + check.is_not_none(result.network_flows) + check.is_instance(result.network_flows, pd.DataFrame) + check.is_not_none(result.plot_flows_by_protocol) + check.is_instance(result.plot_flows_by_protocol, LayoutDOM) + check.is_not_none(result.plot_flows_by_direction) + check.is_instance(result.plot_flows_by_direction, LayoutDOM) + check.is_not_none(result.plot_flow_values) + check.is_instance(result.plot_flow_values, LayoutDOM) + check.is_not_none(result.flow_index) + check.is_instance(result.flow_summary, pd.DataFrame) + + result.select_asns() + result.lookup_ti_for_asn_ips() + result.show_selected_asn_map() diff --git a/tests/nb/template/test_nb_template.py b/tests/nb/template/test_nb_template.py index 0912c61..8f3586b 100644 --- a/tests/nb/template/test_nb_template.py +++ b/tests/nb/template/test_nb_template.py @@ -7,7 +7,7 @@ from pathlib import Path # from contextlib import redirect_stdout -import unittest +import pytest_check as check import pandas as pd @@ -17,30 +17,25 @@ from ...unit_test_lib import TEST_DATA_PATH -class TestTemplateNB(unittest.TestCase): - """Tests for nb_template.""" +def test_template_notebooklet(): + """Test basic run of notebooklet.""" + test_data = str(Path(TEST_DATA_PATH).absolute()) + init( + query_provider="LocalData", + LocalData_data_paths=[test_data], + LocalData_query_paths=[test_data], + ) - def test_template_notebooklet(self): - """Test basic run of notebooklet.""" - test_data = str(Path(TEST_DATA_PATH).absolute()) - init( - query_provider="LocalData", - LocalData_data_paths=[test_data], - LocalData_query_paths=[test_data], - ) + test_nb = TemplateNB() + tspan = TimeSpan(period="1D") - test_nb = TemplateNB() - tspan = TimeSpan(period="1D") + result = test_nb.run(value="myhost", timespan=tspan) + check.is_not_none(result.all_events) + check.is_not_none(result.description) + check.is_not_none(result.plot) - result = test_nb.run(value="myhost", timespan=tspan) - self.assertIsNotNone(result.all_events) - self.assertIsNotNone(result.description) - self.assertIsNotNone(result.plot) + result = test_nb.run(value="myhost", timespan=tspan, options=["+get_metadata"]) + check.is_not_none(result.additional_info) - result = test_nb.run(value="myhost", timespan=tspan, options=["+get_metadata"]) - self.assertIsNotNone(result.additional_info) - - evts = test_nb.run_additional_operation( - ["4679", "5058", "5061", "5059", "4776"] - ) - self.assertIsInstance(evts, pd.DataFrame) + evts = test_nb.run_additional_operation(["4679", "5058", "5061", "5059", "4776"]) + check.is_instance(evts, pd.DataFrame) diff --git a/tests/test_common.py b/tests/test_common.py index 4315ab6..65205eb 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -6,9 +6,10 @@ """common test class.""" from contextlib import redirect_stdout import io -import unittest import warnings +import pytest +import pytest_check as check from msticnb.common import add_result, nb_data_wait, nb_debug, nb_print from msticnb import options, init from msticnb.options import get_opt, set_opt @@ -17,116 +18,116 @@ # pylint: disable=too-many-statements -class TestCommon(unittest.TestCase): - """Unit test class.""" - - def test_print_methods(self): - """Test method.""" - set_opt("verbose", True) - f_stream = io.StringIO() - with redirect_stdout(f_stream): - nb_print("status") - nb_data_wait("table1") - self.assertIn("status", str(f_stream.getvalue())) - self.assertIn("Getting data from table1", str(f_stream.getvalue())) - - set_opt("verbose", False) - f_stream = io.StringIO() - with redirect_stdout(f_stream): - nb_print("status") - self.assertNotIn("status", str(f_stream.getvalue())) - self.assertNotIn("Getting data from table1", str(f_stream.getvalue())) - - set_opt("debug", True) - f_stream = io.StringIO() - with redirect_stdout(f_stream): - nb_debug("debug", "debugmssg", "val", 1, "result", True) - self.assertIn("debug", str(f_stream.getvalue())) - self.assertIn("debugmssg", str(f_stream.getvalue())) - self.assertIn("val", str(f_stream.getvalue())) - self.assertIn("1", str(f_stream.getvalue())) - self.assertIn("result", str(f_stream.getvalue())) - self.assertIn("True", str(f_stream.getvalue())) - - def test_add_result_decorator(self): - """Test method.""" - # pylint: disable=too-few-public-methods - class _TestClass: - prop1 = None - prop2 = None - - test_obj = _TestClass() - - @add_result(result=test_obj, attr_name=["prop1", "prop2"]) - def test_func(): - return "result1", 10 - - test_func() - self.assertEqual("result1", test_obj.prop1) - self.assertEqual(10, test_obj.prop2) - - def test_options(self): - """Test method.""" - set_opt("verbose", True) - f_stream = io.StringIO() - with redirect_stdout(f_stream): - options.current() - self.assertIn("verbose: True", str(f_stream.getvalue())) - - f_stream = io.StringIO() - with redirect_stdout(f_stream): - options.show() - self.assertIn( - "verbose (default=True): Show progress messages.", str(f_stream.getvalue()) - ) - - with self.assertRaises(KeyError): - get_opt("no_option") - - with self.assertRaises(KeyError): - set_opt("no_option", "value") - - # This will work since bool(10) == True - set_opt("verbose", 10) - - @staticmethod - def _capture_nb_run_output(test_nb, **kwargs): - f_stream = io.StringIO() - with redirect_stdout(f_stream): - test_nb.run(**kwargs) - return str(f_stream.getvalue()) - - def test_silent_option(self): - """Test operation of 'silent' option.""" - warnings.filterwarnings(action="ignore", category=UserWarning) - init(query_provider="LocalData", providers=[]) - test_nb = TstNBSummary() - - output = self._capture_nb_run_output(test_nb) - self.assertTrue(output) - - # Silent option to run - output = self._capture_nb_run_output(test_nb, silent=True) - self.assertFalse(output) - self.assertTrue(get_opt("silent")) - - # Silent option to init - test_nb = TstNBSummary(silent=True) - self.assertTrue(test_nb.silent) - output = self._capture_nb_run_output(test_nb) - self.assertFalse(output) - - # But overridable on run - output = self._capture_nb_run_output(test_nb, silent=False) - self.assertTrue(output) - self.assertFalse(get_opt("silent")) - - # Silent global option - set_opt("silent", True) - test_nb = TstNBSummary() - output = self._capture_nb_run_output(test_nb) - self.assertFalse(output) - - # But overridable on run - output = self._capture_nb_run_output(test_nb, silent=False) - self.assertTrue(output) +def test_print_methods(): + """Test method.""" + set_opt("verbose", True) + f_stream = io.StringIO() + with redirect_stdout(f_stream): + nb_print("status") + nb_data_wait("table1") + check.is_in("status", str(f_stream.getvalue())) + check.is_in("Getting data from table1", str(f_stream.getvalue())) + + set_opt("verbose", False) + f_stream = io.StringIO() + with redirect_stdout(f_stream): + nb_print("status") + check.is_not_in("status", str(f_stream.getvalue())) + check.is_not_in("Getting data from table1", str(f_stream.getvalue())) + + set_opt("debug", True) + f_stream = io.StringIO() + with redirect_stdout(f_stream): + nb_debug("debug", "debugmssg", "val", 1, "result", True) + check.is_in("debug", str(f_stream.getvalue())) + check.is_in("debugmssg", str(f_stream.getvalue())) + check.is_in("val", str(f_stream.getvalue())) + check.is_in("1", str(f_stream.getvalue())) + check.is_in("result", str(f_stream.getvalue())) + check.is_in("True", str(f_stream.getvalue())) + + +def test_add_result_decorator(): + """Test method.""" + # pylint: disable=too-few-public-methods + class _TestClass: + prop1 = None + prop2 = None + + test_obj = _TestClass() + + @add_result(result=test_obj, attr_name=["prop1", "prop2"]) + def test_func(): + return "result1", 10 + + test_func() + check.equal("result1", test_obj.prop1) + check.equal(10, test_obj.prop2) + + +def test_options(): + """Test method.""" + set_opt("verbose", True) + f_stream = io.StringIO() + with redirect_stdout(f_stream): + options.current() + check.is_in("verbose: True", str(f_stream.getvalue())) + + f_stream = io.StringIO() + with redirect_stdout(f_stream): + options.show() + check.is_in( + "verbose (default=True): Show progress messages.", str(f_stream.getvalue()) + ) + + with pytest.raises(KeyError): + get_opt("no_option") + + with pytest.raises(KeyError): + set_opt("no_option", "value") + + # This will work since bool(10) == True + set_opt("verbose", 10) + + +def _capture_nb_run_output(test_nb, **kwargs): + f_stream = io.StringIO() + with redirect_stdout(f_stream): + test_nb.run(**kwargs) + return str(f_stream.getvalue()) + + +def test_silent_option(): + """Test operation of 'silent' option.""" + warnings.filterwarnings(action="ignore", category=UserWarning) + init(query_provider="LocalData", providers=[]) + test_nb = TstNBSummary() + + output = _capture_nb_run_output(test_nb) + check.is_true(output) + + # Silent option to run + output = _capture_nb_run_output(test_nb, silent=True) + check.is_false(output) + check.is_true(get_opt("silent")) + + # Silent option to init + test_nb = TstNBSummary(silent=True) + check.is_true(test_nb.silent) + output = _capture_nb_run_output(test_nb) + check.is_false(output) + + # But overridable on run + output = _capture_nb_run_output(test_nb, silent=False) + check.is_true(output) + check.is_false(get_opt("silent")) + + # Silent global option + set_opt("silent", True) + test_nb = TstNBSummary() + output = _capture_nb_run_output(test_nb) + check.is_false(output) + + # But overridable on run + output = _capture_nb_run_output(test_nb, silent=False) + check.is_true(output) diff --git a/tests/test_dataprovider.py b/tests/test_dataprovider.py index 2747d83..81badc7 100644 --- a/tests/test_dataprovider.py +++ b/tests/test_dataprovider.py @@ -5,7 +5,8 @@ # -------------------------------------------------------------------------- """data_providers test class.""" import sys -import unittest + +import pytest_check as check from msticpy.data import QueryProvider from msticpy.sectools.geoip import GeoLiteLookup from msticpy.sectools import TILookup @@ -16,66 +17,65 @@ # pylint: disable=no-member -class TestDataProviders(unittest.TestCase): - """Unit test class.""" - - def test_init_data_providers(self): - """Test creating DataProviders instance.""" - dprov = DataProviders(query_provider="LocalData") - - self.assertIsNotNone(dprov) - self.assertIs(dprov, DataProviders.current()) - - self.assertIn("LocalData", dprov.providers) - self.assertIn("geolitelookup", dprov.providers) - self.assertIn("tilookup", dprov.providers) - self.assertIsInstance(dprov.providers["LocalData"], QueryProvider) - self.assertIsInstance(dprov.providers["geolitelookup"], GeoLiteLookup) - self.assertIsInstance(dprov.providers["tilookup"], TILookup) - - def test_new_init_data_providers(self): - """Test creating new provider with new provider list.""" - init(query_provider="LocalData", providers=[]) - dprov = DataProviders.current() - init(query_provider="LocalData", providers=[]) - dprov2 = DataProviders.current() - self.assertIs(dprov2, dprov) - - # specify provider - dprov = DataProviders(query_provider="LocalData") - init(query_provider="LocalData", providers=["tilookup"]) - msticnb = sys.modules["msticnb"] - dprov2 = DataProviders.current() - pkg_providers = getattr(msticnb, "data_providers") - self.assertIsNot(dprov2, dprov) - self.assertIn("LocalData", dprov2.providers) - self.assertIn("tilookup", dprov2.providers) - self.assertNotIn("geolitelookup", dprov2.providers) - self.assertNotIn("ipstacklookup", dprov2.providers) - self.assertIn("LocalData", pkg_providers) - self.assertIn("tilookup", pkg_providers) - self.assertNotIn("geolitelookup", pkg_providers) - self.assertNotIn("ipstacklookup", pkg_providers) - - self.assertIsInstance(dprov2.providers["tilookup"], TILookup) - - def test_add_sub_data_providers(self): - """Test intializing adding and subtracting providers.""" - dprov = DataProviders(query_provider="LocalData") - init(query_provider="LocalData", providers=["tilookup"]) - msticnb = sys.modules["msticnb"] - dprov2 = DataProviders.current() - - # Add and remove a provider from defaults - init(query_provider="LocalData", providers=["+ipstacklookup", "-geolitelookup"]) - - dprov3 = DataProviders.current() - pkg_providers = getattr(msticnb, "data_providers") - self.assertIsNot(dprov3, dprov) - self.assertIsNot(dprov3, dprov2) - self.assertIn("ipstacklookup", dprov3.providers) - self.assertNotIn("geolitelookup", dprov3.providers) - self.assertIn("tilookup", dprov3.providers) - self.assertIn("ipstacklookup", pkg_providers) - self.assertNotIn("geolitelookup", pkg_providers) - self.assertIn("tilookup", pkg_providers) +def test_init_data_providers(): + """Test creating DataProviders instance.""" + dprov = DataProviders(query_provider="LocalData") + + check.is_not_none(dprov) + check.equal(dprov, DataProviders.current()) + + check.is_in("LocalData", dprov.providers) + check.is_in("geolitelookup", dprov.providers) + check.is_in("tilookup", dprov.providers) + check.is_instance(dprov.providers["LocalData"], QueryProvider) + check.is_instance(dprov.providers["geolitelookup"], GeoLiteLookup) + check.is_instance(dprov.providers["tilookup"], TILookup) + + +def test_new_init_data_providers(): + """Test creating new provider with new provider list.""" + init(query_provider="LocalData", providers=[]) + dprov = DataProviders.current() + init(query_provider="LocalData", providers=[]) + dprov2 = DataProviders.current() + check.equal(dprov2, dprov) + + # specify provider + dprov = DataProviders(query_provider="LocalData") + init(query_provider="LocalData", providers=["tilookup"]) + msticnb = sys.modules["msticnb"] + dprov2 = DataProviders.current() + pkg_providers = getattr(msticnb, "data_providers") + check.not_equal(dprov2, dprov) + check.is_in("LocalData", dprov2.providers) + check.is_in("tilookup", dprov2.providers) + check.is_not_in("geolitelookup", dprov2.providers) + check.is_not_in("ipstacklookup", dprov2.providers) + check.is_in("LocalData", pkg_providers) + check.is_in("tilookup", pkg_providers) + check.is_not_in("geolitelookup", pkg_providers) + check.is_not_in("ipstacklookup", pkg_providers) + + check.is_instance(dprov2.providers["tilookup"], TILookup) + + +def test_add_sub_data_providers(): + """Test intializing adding and subtracting providers.""" + dprov = DataProviders(query_provider="LocalData") + init(query_provider="LocalData", providers=["tilookup"]) + msticnb = sys.modules["msticnb"] + dprov2 = DataProviders.current() + + # Add and remove a provider from defaults + init(query_provider="LocalData", providers=["+ipstacklookup", "-geolitelookup"]) + + dprov3 = DataProviders.current() + pkg_providers = getattr(msticnb, "data_providers") + check.not_equal(dprov3, dprov) + check.not_equal(dprov3, dprov2) + check.is_in("ipstacklookup", dprov3.providers) + check.is_not_in("geolitelookup", dprov3.providers) + check.is_in("tilookup", dprov3.providers) + check.is_in("ipstacklookup", pkg_providers) + check.is_not_in("geolitelookup", pkg_providers) + check.is_in("tilookup", pkg_providers) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 3eb1340..00f4d6b 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -4,58 +4,52 @@ # license information. # -------------------------------------------------------------------------- """NB metadata test class.""" -import unittest - +import pytest_check as check from msticnb import init from msticnb.nb_metadata import NBMetadata, read_mod_metadata from msticnb.nb.azsent.host import host_summary -class TestMetadata(unittest.TestCase): - """Unit test class.""" +def test_read_metadata(): + """Tests reading metadata yaml file.""" + nb_md, docs = read_mod_metadata(host_summary.__file__, host_summary.__name__) + check.is_instance(nb_md, NBMetadata) + check.is_instance(docs, dict) - def test_read_metadata(self): - """Tests reading metadata yaml file.""" - nb_md, docs = read_mod_metadata(host_summary.__file__, host_summary.__name__) - self.assertIsInstance(nb_md, NBMetadata) - self.assertIsInstance(docs, dict) + opts = nb_md.get_options("all") + check.is_in("heartbeat", [opt[0] for opt in opts]) + check.is_in("alerts", [opt[0] for opt in opts]) - opts = nb_md.get_options("all") - self.assertIn("heartbeat", [opt[0] for opt in opts]) - self.assertIn("alerts", [opt[0] for opt in opts]) + for item in ("Default Options", "alerts", "azure_api"): + check.is_in(item, nb_md.options_doc) - for item in ("Default Options", "alerts", "azure_api"): - self.assertIn(item, nb_md.options_doc) + for item in ("Default Options", "alerts", "azure_api"): + check.is_in(item, nb_md.options_doc) - # try adding metadata to this class docstring - self.__class__.__doc__ += nb_md.options_doc - self.assertTrue(self.__class__.__doc__) - for item in ("Default Options", "alerts", "azure_api"): - self.assertIn(item, self.__class__.__doc__) - # pylint: disable=protected-access - def test_class_metadata(self): - """Test class correctly loads yaml metadata.""" - init(query_provider="LocalData", providers=["tilookup"]) - host_nb = host_summary.HostSummary() +# pylint: disable=protected-access +def test_class_metadata(): + """Test class correctly loads yaml metadata.""" + init(query_provider="LocalData", providers=["tilookup"]) + host_nb = host_summary.HostSummary() - self.assertTrue(hasattr(host_summary, "_CLS_METADATA")) - self.assertIsInstance(host_summary._CLS_METADATA, NBMetadata) - self.assertTrue(hasattr(host_summary, "_CELL_DOCS")) - self.assertIsInstance(host_summary._CELL_DOCS, dict) + check.is_true(hasattr(host_summary, "_CLS_METADATA")) + check.is_instance(host_summary._CLS_METADATA, NBMetadata) + check.is_true(hasattr(host_summary, "_CELL_DOCS")) + check.is_instance(host_summary._CELL_DOCS, dict) - self.assertTrue(hasattr(host_nb, "metadata")) - self.assertIsInstance(host_nb.metadata, NBMetadata) - self.assertEqual(host_nb.metadata.mod_name, host_summary.__name__) - self.assertEqual(host_nb.description(), "Host summary") - self.assertEqual(host_nb.name(), "HostSummary") - self.assertIn("host", host_nb.entity_types()) - self.assertIn("host", host_nb.keywords()) + check.is_true(hasattr(host_nb, "metadata")) + check.is_instance(host_nb.metadata, NBMetadata) + check.equal(host_nb.metadata.mod_name, host_summary.__name__) + check.equal(host_nb.description(), "Host summary") + check.equal(host_nb.name(), "HostSummary") + check.is_in("host", host_nb.entity_types()) + check.is_in("host", host_nb.keywords()) - self.assertIn("heartbeat", host_nb.default_options()) - self.assertIn("alerts", host_nb.default_options()) + check.is_in("heartbeat", host_nb.default_options()) + check.is_in("alerts", host_nb.default_options()) - self.assertIn("alerts", host_nb.all_options()) + check.is_in("alerts", host_nb.all_options()) - for item in ("Default Options", "alerts", "azure_api"): - self.assertIn(item, host_nb.list_options()) + for item in ("Default Options", "alerts", "azure_api"): + check.is_in(item, host_nb.list_options()) diff --git a/tests/test_nb_pivot.py b/tests/test_nb_pivot.py index a6b61c9..e3bac2b 100644 --- a/tests/test_nb_pivot.py +++ b/tests/test_nb_pivot.py @@ -12,11 +12,11 @@ from msticpy.datamodel import entities from msticpy.datamodel.pivot import Pivot -from msticnb.data_providers import init +from msticnb import data_providers from msticnb.nb_pivot import add_pivot_funcs from msticnb.notebooklet import NotebookletResult -from .unit_test_lib import TEST_DATA_PATH +from .unit_test_lib import TEST_DATA_PATH, GeoIPLiteMock __author__ = "Ian Hellen" @@ -31,26 +31,30 @@ "win_host_events", "network_flow_summary", ), + "test_host", ), - ("Account", ("account_summary",)), - ("IpAddress", ("network_flow_summary",)), + ("Account", ("account_summary",), "test_acct"), + ("IpAddress", ("network_flow_summary",), "11.1.2.3"), ] -@pytest.fixture(scope="session") -def _init_pivot(): +@pytest.fixture +def _init_pivot(monkeypatch): test_data = str(Path(TEST_DATA_PATH).absolute()) - init( + monkeypatch.setattr(data_providers, "GeoLiteLookup", GeoIPLiteMock) + data_providers.init( query_provider="LocalData", + providers=["geolitelookup"], LocalData_data_paths=[test_data], LocalData_query_paths=[test_data], ) return Pivot() -@pytest.mark.parametrize("ent_name, funcs", _EXPECTED_FUNCS) -def test_add_pivot_funcs(_init_pivot, ent_name, funcs): +@pytest.mark.parametrize("ent_name, funcs, test_val", _EXPECTED_FUNCS) +def test_add_pivot_funcs(_init_pivot, ent_name, funcs, test_val): """Test adding notebooklets to pivot.""" + del test_val add_pivot_funcs(_init_pivot) entity = getattr(entities, ent_name) @@ -59,9 +63,10 @@ def test_add_pivot_funcs(_init_pivot, ent_name, funcs): check.is_true(hasattr(container, func_name)) -@pytest.mark.parametrize("ent_name, funcs", _EXPECTED_FUNCS) -def test_run_pivot_funcs(_init_pivot, ent_name, funcs): +@pytest.mark.parametrize("ent_name, funcs, test_val", _EXPECTED_FUNCS) +def test_run_pivot_funcs(_init_pivot, ent_name, funcs, test_val): """Test running notebooklets run functions.""" + del funcs add_pivot_funcs(_init_pivot) entity = getattr(entities, ent_name) @@ -70,7 +75,7 @@ def test_run_pivot_funcs(_init_pivot, ent_name, funcs): container = getattr(entity, "nblt") for _, p_func in container: check.is_true(callable(p_func)) - result = p_func(value="testhost") + result = p_func(value=test_val) test_result = result[0] if isinstance(result, list) else result check.is_true(isinstance(test_result, NotebookletResult)) check.equal(test_result.timespan, _init_pivot.get_timespan()) diff --git a/tests/test_notebooklet.py b/tests/test_notebooklet.py index ac8cead..6c2aaea 100644 --- a/tests/test_notebooklet.py +++ b/tests/test_notebooklet.py @@ -5,151 +5,155 @@ # -------------------------------------------------------------------------- """common test class.""" # from contextlib import redirect_stdout -import unittest from contextlib import redirect_stdout from io import StringIO +import pytest +import pytest_check as check + import pandas as pd from lxml import etree # nosec from markdown import markdown from msticnb.common import MsticnbDataProviderError -from msticnb.data_providers import init +from msticnb import data_providers from msticnb.nb.azsent.host.host_summary import HostSummaryResult from msticnb.read_modules import Notebooklet, nblts -from msticpy.common.exceptions import MsticpyUserConfigError from msticpy.common.timespan import TimeSpan -from msticpy.sectools import GeoLiteLookup from .nb_test import TstNBSummary +from .unit_test_lib import GeoIPLiteMock # pylint: disable=c-extension-no-member, protected-access -class TestNotebooklet(unittest.TestCase): - """Unit test class.""" - - def test_notebooklet_create(self): - """Test method.""" - test_with_geop = True - try: - geoip = GeoLiteLookup() - if not geoip._api_key: - test_with_geop = False - del geoip - except MsticpyUserConfigError: - test_with_geop = False - - if test_with_geop: - # Should run because required providers are loaded - init(query_provider="LocalData", providers=["tilookup", "geolitelookup"]) - for _, nblt in nblts.iter_classes(): - new_nblt = nblt() - self.assertIsInstance(new_nblt, Notebooklet) - self.assertIsNone(new_nblt.result) - - # Should throw a warning because of unrecognized provider - init(query_provider="LocalData") - with self.assertRaises(MsticnbDataProviderError) as err: - for _, nblt in nblts.iter_classes(): - curr_provs = nblt.metadata.req_providers - bad_provs = [*curr_provs, "bad_provider"] - try: - nblt.metadata.req_providers = bad_provs - new_nblt = nblt() - self.assertIsInstance(new_nblt, Notebooklet) - self.assertIsNone(new_nblt.result) - finally: - nblt.metadata.req_providers = curr_provs - self.assertIn("bad_provider", err.exception.args[0]) - test_nb = TstNBSummary() - self.assertIsNotNone(test_nb.get_provider("LocalData")) - with self.assertRaises(MsticnbDataProviderError): - test_nb.get_provider("otherprovider") - - def test_notebooklet_params(self): - """Test supplying timespan param.""" - init(query_provider="LocalData", providers=["tilookup"]) - test_nb = TstNBSummary() - - tspan = TimeSpan(period="1D") - test_nb.run(timespan=tspan) - self.assertEqual(tspan, test_nb.timespan) - - test_nb.run(start=tspan.start, end=tspan.end) - self.assertEqual(tspan, test_nb.timespan) - - def test_notebooklet_options(self): - """Test option logic for notebooklet.""" - init(query_provider="LocalData") - nb_test = TstNBSummary() - - # default options - nb_res = nb_test.run() - self.assertIsNotNone(nb_res.default_property) - self.assertIsNone(nb_res.optional_property) - - # add optional option - nb_res = nb_test.run(options=["+optional_opt"]) - self.assertIsNotNone(nb_res.default_property) - self.assertIsNotNone(nb_res.optional_property) - - # remove default option - nb_res = nb_test.run(options=["-default_opt"]) - self.assertIsNone(nb_res.default_property) - self.assertIsNone(nb_res.optional_property) - - # specific options - nb_res = nb_test.run(options=["heartbest", "azure_net"]) - self.assertIsNone(nb_res.default_property) - self.assertIsNone(nb_res.optional_property) - - # invalid option - f_stream = StringIO() - with redirect_stdout(f_stream): - nb_test.run(options=["invalid_opt"]) - output = str(f_stream.getvalue()) - self.assertIn("Invalid options ['invalid_opt']", output) - - def test_class_doc(self): - """Test class documentation.""" - for _, nblt in nblts.iter_classes(): - html_doc = nblt.get_help() - self.assertNotEqual(html_doc, "No documentation available.") - self.assertGreater(len(html_doc), 100) - md_doc = nblt.get_help(fmt="md") - html_doc2 = markdown(md_doc) - self.assertEqual(html_doc, html_doc2) - _html_parser = etree.HTMLParser(recover=False) - elem_tree = etree.parse(StringIO(html_doc), _html_parser) - self.assertIsNotNone(elem_tree) - - def test_class_methods(self): - """Test method.""" +def test_notebooklet_create(monkeypatch): + """Test method.""" + # Should run because required providers are loaded + monkeypatch.setattr(data_providers, "GeoLiteLookup", GeoIPLiteMock) + data_providers.init( + query_provider="LocalData", providers=["tilookup", "geolitelookup"] + ) + for _, nblt in nblts.iter_classes(): + new_nblt = nblt() + check.is_instance(new_nblt, Notebooklet) + check.is_none(new_nblt.result) + + # Should throw a warning because of unrecognized provider + data_providers.init(query_provider="LocalData") + with pytest.raises(MsticnbDataProviderError) as err: for _, nblt in nblts.iter_classes(): - self.assertIsNotNone(nblt.description()) - self.assertIsNotNone(nblt.name()) - self.assertGreater(len(nblt.all_options()), 0) - self.assertGreater(len(nblt.default_options()), 0) - self.assertGreater(len(nblt.keywords()), 0) - self.assertGreater(len(nblt.entity_types()), 0) - metadata = nblt.get_settings(print_settings=False) - self.assertIsNotNone(metadata) - self.assertIn("mod_name", metadata) - self.assertIn("default_options", metadata) - self.assertIn("keywords", metadata) - - def test_nbresult(self): - """Test method.""" - host_result = HostSummaryResult() - host_result.host_entity = {"host_name": "myhost"} - host_result.related_alerts = pd.DataFrame() - host_result.related_bookmarks = pd.DataFrame() - self.assertIn("host_entity:", str(host_result)) - self.assertIn("DataFrame:", str(host_result)) - self.assertIn("host_entity", host_result.properties) - - html_doc = host_result._repr_html_() + curr_provs = nblt.metadata.req_providers + bad_provs = [*curr_provs, "bad_provider"] + try: + nblt.metadata.req_providers = bad_provs + new_nblt = nblt() + check.is_instance(new_nblt, Notebooklet) + check.is_none(new_nblt.result) + finally: + nblt.metadata.req_providers = curr_provs + check.is_in("bad_provider", err.value.args[0]) + test_nb = TstNBSummary() + check.is_not_none(test_nb.get_provider("LocalData")) + with pytest.raises(MsticnbDataProviderError): + test_nb.get_provider("otherprovider") + + +def test_notebooklet_params(monkeypatch): + """Test supplying timespan param.""" + monkeypatch.setattr(data_providers, "GeoLiteLookup", GeoIPLiteMock) + data_providers.init( + query_provider="LocalData", providers=["tilookup", "geolitelookup"] + ) + test_nb = TstNBSummary() + + tspan = TimeSpan(period="1D") + test_nb.run(timespan=tspan) + check.equal(tspan, test_nb.timespan) + + test_nb.run(start=tspan.start, end=tspan.end) + check.equal(tspan, test_nb.timespan) + + +def test_notebooklet_options(monkeypatch): + """Test option logic for notebooklet.""" + monkeypatch.setattr(data_providers, "GeoLiteLookup", GeoIPLiteMock) + data_providers.init( + query_provider="LocalData", providers=["tilookup", "geolitelookup"] + ) + nb_test = TstNBSummary() + + # default options + nb_res = nb_test.run() + check.is_not_none(nb_res.default_property) + check.is_none(nb_res.optional_property) + + # add optional option + nb_res = nb_test.run(options=["+optional_opt"]) + check.is_not_none(nb_res.default_property) + check.is_not_none(nb_res.optional_property) + + # remove default option + nb_res = nb_test.run(options=["-default_opt"]) + check.is_none(nb_res.default_property) + check.is_none(nb_res.optional_property) + + # specific options + nb_res = nb_test.run(options=["heartbest", "azure_net"]) + check.is_none(nb_res.default_property) + check.is_none(nb_res.optional_property) + + # invalid option + f_stream = StringIO() + with redirect_stdout(f_stream): + nb_test.run(options=["invalid_opt"]) + output = str(f_stream.getvalue()) + check.is_in("Invalid options ['invalid_opt']", output) + + +def test_class_doc(): + """Test class documentation.""" + for _, nblt in nblts.iter_classes(): + html_doc = nblt.get_help() + check.not_equal(html_doc, "No documentation available.") + check.greater(len(html_doc), 100) + + md_doc = nblt.get_help(fmt="md") + html_doc2 = markdown(md_doc) + check.equal(html_doc, html_doc2) + _html_parser = etree.HTMLParser(recover=False) elem_tree = etree.parse(StringIO(html_doc), _html_parser) - self.assertIsNotNone(elem_tree) + check.is_not_none(elem_tree) + + +def test_class_methods(): + """Test method.""" + for _, nblt in nblts.iter_classes(): + check.is_not_none(nblt.description()) + check.is_not_none(nblt.name()) + check.greater(len(nblt.all_options()), 0) + check.greater(len(nblt.default_options()), 0) + check.greater(len(nblt.keywords()), 0) + check.greater(len(nblt.entity_types()), 0) + metadata = nblt.get_settings(print_settings=False) + check.is_not_none(metadata) + check.is_in("mod_name", metadata) + check.is_in("default_options", metadata) + check.is_in("keywords", metadata) + + +def test_nbresult(): + """Test method.""" + host_result = HostSummaryResult() + host_result.host_entity = {"host_name": "myhost"} + host_result.related_alerts = pd.DataFrame() + host_result.related_bookmarks = pd.DataFrame() + check.is_in("host_entity:", str(host_result)) + check.is_in("DataFrame:", str(host_result)) + check.is_in("host_entity", host_result.properties) + + html_doc = host_result._repr_html_() + _html_parser = etree.HTMLParser(recover=False) + elem_tree = etree.parse(StringIO(html_doc), _html_parser) + check.is_not_none(elem_tree) diff --git a/tests/test_read_modules.py b/tests/test_read_modules.py index c349b4f..517aca3 100644 --- a/tests/test_read_modules.py +++ b/tests/test_read_modules.py @@ -5,52 +5,50 @@ # -------------------------------------------------------------------------- """read_modules test class.""" from pathlib import Path -import unittest +import pytest_check as check from msticnb.read_modules import discover_modules, Notebooklet, find, nblts, nb_index from .unit_test_lib import TEST_DATA_PATH -class TestReadModules(unittest.TestCase): - """Unit test class.""" - - def test_read_modules(self): - """Test method.""" - nbklts = discover_modules() - self.assertGreaterEqual(len(list(nbklts.iter_classes())), 4) - - # pylint: disable=no-member - match, m_count = nblts.azsent.host.HostSummary.match_terms("host, linux, azure") - self.assertTrue(match) - self.assertEqual(m_count, 3) - - for key, value in nbklts.iter_classes(): - self.assertIsInstance(key, str) - self.assertTrue(issubclass(value, Notebooklet)) - - find_res = find("host windows azure") - self.assertGreater(len(find_res), 0) - not_found = find("monkey stew") - self.assertEqual(len(not_found), 0) - - def test_read_custom_path(self): - """Test method.""" - cust_nb_path = Path(TEST_DATA_PATH) / "custom_nb" - nbklts = discover_modules(nb_path=str(cust_nb_path)) - self.assertGreaterEqual(len(list(nbklts.iter_classes())), 5) - - # pylint: disable=no-member - match, m_count = nblts.custom_nb.host.CustomNB.match_terms("Custom") - self.assertTrue(match) - self.assertEqual(m_count, 1) - - for key, value in nbklts.iter_classes(): - self.assertIsInstance(key, str) - self.assertTrue(issubclass(value, Notebooklet)) - - find_res = find("banana") - self.assertEqual(len(find_res), 1) - find_res = find("<>") - self.assertEqual(len(find_res), 1) - self.assertEqual(find_res[0][0], "CustomNB") - self.assertIn("nblts.host.CustomNB", nb_index) +def test_read_modules(): + """Test method.""" + nbklts = discover_modules() + check.greater_equal(len(list(nbklts.iter_classes())), 4) + + # pylint: disable=no-member + match, m_count = nblts.azsent.host.HostSummary.match_terms("host, linux, azure") + check.is_true(match) + check.equal(m_count, 3) + + for key, value in nbklts.iter_classes(): + check.is_instance(key, str) + check.is_true(issubclass(value, Notebooklet)) + + find_res = find("host windows azure") + check.greater(len(find_res), 0) + not_found = find("monkey stew") + check.equal(len(not_found), 0) + + +def test_read_custom_path(): + """Test method.""" + cust_nb_path = Path(TEST_DATA_PATH) / "custom_nb" + nbklts = discover_modules(nb_path=str(cust_nb_path)) + check.greater_equal(len(list(nbklts.iter_classes())), 5) + + # pylint: disable=no-member + match, m_count = nblts.custom_nb.host.CustomNB.match_terms("Custom") + check.is_true(match) + check.equal(m_count, 1) + + for key, value in nbklts.iter_classes(): + check.is_instance(key, str) + check.is_true(issubclass(value, Notebooklet)) + + find_res = find("banana") + check.equal(len(find_res), 1) + find_res = find("<>") + check.equal(len(find_res), 1) + check.equal(find_res[0][0], "CustomNB") + check.is_in("nblts.host.CustomNB", nb_index) diff --git a/tests/testdata/az_net_df.pkl b/tests/testdata/az_net_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..c7e1d6ae7d6a8d06bb4586fe6cc229483546e9d9 GIT binary patch literal 10735 zcmeHNX>i-t5teL8mUUR74xhOjCw3Cbq(oVgW5<>#TVWu{iYP0m!+<2jg-ikr09uw4 zH+B-+ae|yi4(Gn_`)GP3J(6DOm9}YmOn!ByGo9&7e{|ZJHow|^4+|QAXvdt+^hd|! z3_oDsx9`}uyYD@Kd#mpI)9MQTF#~x~NsD?%O4X!}DNW2tmbq|HG{i0Z`mnWnpS8zY zWmYHBD74Je+sdPh!F#Ehirmf0|nRZ}}g?0bvruiB0kEcGg;rCpNVkrZ`o zuP&iTGl`gjGE{(iv)Z|4EmzqMw=0EQe%9`1u8=ikQPad(Y%ifnDOJ%8t&lQM-K^$B z%Upot;*`UdP_f^NRwk{3*0{Aws1)AaZ$Z0Roi=9k5>uJCwHG-;rCmPXth~V*9UZ;o z-}vD_!bZbN9%7@Yosbk!F=Ae*TF8%bdp1tU zIcZ2zBuz9V)U1uFxqJalXPHZ5`j)IZ6BYADL6g$`#o9~v)Yr34yv zQCv%n6_Qyw6&ur;plMLj4Uc3At7llv#0^!$spwCo)PiC#Zq0<4;Yy41;%rtG)5DUI zF(9T4%o>uOzz($}MI!}qdAH;eE{YeDdPAp1_v=cnYMPIEfQYGTv&8xxr2` z8#hGFU}H@fmf6nwHJ;qt?hIF8Tr-d2EbvYuS+P%bLj%`=XAU zLDdX4iiY9Hn5G(PO3gBU!*GxLwux0dD5JAD<7uGAjRLn0v-hy9bCcDP?Rh~X-4r|R zHcXfg!!7A#l7-_^22I3E>6*){M`KY|UW3RB*#$4DnjB#T)srJ!j>yHeShXVYPPO;v z^XLI0>;DjX(u_FEsU_rGwP_gVdO*yIN=ni=(<9-W#lvb!%#KRNjH>N$S6ZsT*4Vr& zuSvQ-uA&D=Wi3^Z4PN=;5(Ql2j1mQ7lBCH>#xZGgR8&0v|>~>Ii#WlhndS~B}@Xv*6Te%%$@XTJg{LhrV|g$EHqb4 z>{zeK?SA1z2GfIE(#a&Xe7?991O4eVo53+nnvyXKGwU_*;t4Tn=bI)Eg%RpUrCkPQ zCOhsL>8{uVJB@8tSOsa8y=pN5VHb*>r=7*F!Z8f&aZ$-|f2jAw zZrzwBtD0=gve~F(Y0~LRyPcNjR2;D7o;uH%8&Lynl5wt8HOo8SW{)X0wnf!uL@kX@ z%Z9hy9b0~#hLQd#wa><1jroIN3-);@#%*kj3MCWHsojY2SqqHoh z98;^kL^@+*sA2hrT{JO2$2q)Ak~6?pjiofa5;?GR5zlGMoql0oapsa)Q8t)-1($o9qM*dm)@?c_ zPu5M+)R|7eOXqprC*!?oMz?cXbe3|ux7~ydvyJ?Hi>BfRK}plrrPcO$pR}2R$X2)W zhM(fxA8jJw6>+r|cW63Jv(VPz1EOm_5SAzv$#6}9N5VNy02D*bZiYJSiIp}IIx&G+ zgju4om?7+)N-Jh>On*uy=8nf!XE2^B7dcXvxp=m7?`oFQ3XHOyGTW44r7YxlmRJ$T zZJC^scwt)75c85|$aavPHMewb74`8Qg+a*>Wj>BNzUKDFM_E4>vY4XlM&EkQQe95e zW;vgC9IddNgj~F}2)Vt?q3pv|1M#do*s+eU-L9h?d_HPc z>+Fz#r!BKeH`Kf(RI&?+UGO?3JY_)}EMZ4BSmqqqDuv1e7My0zONQMCH8{(AOsinW zv}k38g*dkoYyvAelb_?x?(R&6#bhKL>|NU@1ta0E-e8{;2?aaFNTj=GD%6*p3gObt z9oRA}EcgPx7-c{It8dB1%s?m<3=Iam`h(#}aIilZjacStQI`St#cEDxTLwKZ;(kA~ zCWo6W+}mTo>oc+v4k|j^28Kf&p^mPOZd~Rgp^l!l9g(h%a3|KXu5>LWr8_ZYc6Ilz zNlQ~A9>pzlWl5U93l<+!$$`18igWs<9L7H;$*c@p=INo%U^pD?42AoG%xG(ao#9|# zxTmix(wFG&#Su4R6dE9j=#jdNk2Y4F&>(s{J8?h3vmA-1N7G2i~V;hHvv4p$YRg1WK%FxcOrnHTbH?*1XT1{pK+h zEK*nnE8!eC7goV}a6ViBLFj-r5P~jH61RG%!^uqu|VGuUM78rtyAqJO# z0K+f>qp%gm;8GZeI84Acn1svVa<~Gngsb3c$N+#0)35`wkOKu&$isD@fer=~U?f@;RJjVXMA$;uPPmwG3E?ur z<%BB;Pa!;&@HE2H3C|!rlkhCUuMnP1xRUT3!gC2%5z>D=D$b+(^9e5?3=(z_t|1H& zh6y_fy9gtM-Gplidk6;zqlANmn+dlN4iR2V7$dxdP#_#893dPf+)6k`cq!pHVVrOq z;UwW@gqIUuNq80E)r8v#uOUnjUP~wvCJ9r7X+nu`iZDY6gfihY;SRzap-Px1)ChG# zgRnrjlW>M`7vU`7Zo=ycZy?-5_*KFi3HK82BfN?5X2M$t_Y)orz-cNXh@FBv72_GYTobV~aX9%Aqe2(yW!WRf%Bz%eRWx`hp zUn6{-@D0K@3Ev`ooA4b%_AsiL_of>#P4Uz>yWKmoXu1^D|!dGO|L>&v?_zPz2S_c+bnYPSE^}54?;!g}eNWkGza~ zgnRvrkG+fsga`eMPrQr=g(H5(XI{o5!lQo1=U&F6!ef5MAH9qxgeU!sKYJNZ3QzeN zfAun+7M}4l{_bVGBD_irsi0#TF{={BY#!eVvOh^Wf8R{x@Qp41*W`AqknH#mlkd&+ literal 0 HcmV?d00001 diff --git a/tests/testdata/az_net_if_df.pkl b/tests/testdata/az_net_if_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d2193951443d39568c359a4263421751fd9bf401 GIT binary patch literal 18625 zcmeI42Xq|OxyL12l4V=6EO(n`i~$>vtma0S<${nU3rQAWFk*Ihq;F+uR_xB&mcd|( z4ThyJz4zXdKzafp71ALg0YV_Xk)A*bA@{rUzjtT$j$Cqh=j6Tf#^?NhySMjyzq=!E zE4uEP6AI*$sT{JLn3bxICX#k_Z_*mHvzf8WE!SEpKVO@je|~mrc3!5aXRt4s&Ft?F z9(POsq~`s`#Xn<-m1kce(*^}c6i^m*$={29TBF?+L}s*YGGk=?i+$KKS_ z><)q#aE-DL0vpx*~d}SO&~y z#)xXkAlGJF3btpP3M1KzvmM!aErqR@6>iVc)J$>A9Uihd&Zsp@vO6#FyhmpW&&js6 zwVn9Bc%-x(=Vl{2xe}#awqrSNb4=vhnMfz2cIR-)wFgBCt*f$`$zHVwZO2`eOr(d} zWNVgp#Ru(GwqqwP*A_8Ln-YUVY0+|`=G}v-6o={&EjMOe!m(5QX(m@G8B6sv#$ri3 zm1ZcA>^DXEbtFU@T~=g3MxL&m15r4_WaZOlZ+1^e3^lj2>(o5gF3}R~et>dc zIgr@2I?>nZCX%9O8>7)g+HpCA&Z#-YVOBU?gji1`o0+KMiqN9efozuBsc14jFdJxN7tPsS6;xI5e<8eKbfqz7zO z+5J>VuP==P=zj4019{I=aT|w*2I5g!gH@t~H(A5Wtmp>YiM1yZ1F{kJRYASBYHnYd zNN%!{vAEO6&8FL8b;61*vj!|DYA5s4(B0nJ*o1O&JC>`Y+d5bAcb%1}`$kck=5_@3 z2G2e!czZe`3Z3Vx^XN=R9NXQLNN(UXXQ)Ws@uZu!23oBs#U0rJI=ki+JY{Qe4Y+2y ze{@EH^t^0B2luj7?Y@Df2P+y_^`EYNXUkenSy!MtkfE+M${4JkZvO5er)eNA=5Amd zcnvp8wLw$DalE0{=~`}D_OEWBoTn<`Y!z=sA5Y|E!>*m`N?J~;*G?wItPvwq&p1@h zOyoe7NV79yDtaidPDHJNHm~=PA~($hl}5X5C)q(yV_rwZNIcn&_mRs5(q%?-1rw{~ z$h!_w$3b+ns`@y6!Sp!L!Kzdp_F%%bg8_V{*KHIG(P8(AbwieDmmy?lXBq$?3`EUjWq%s#J~cr6n6G&rLRvZUrx#Z)k-2$T+$I&>4F13Kk0u5Li=m z)524&8Dh=nme(tmUdNMiZs{Dw)+x3Or?1(+&gFb{NBP$JX{seu6Y_o(*xTBZ2{#e- zN|2j>@nkd|cZJ`^cuGtcktr`-a{bmLy3j7M!5C}t-GhwP*q_ZGjyRK{i~GfBPX zcWy{8w_PhP+e2@^ja$-5yRoy4%dNYS*Ce>~GMM$;T-(4qG;>{ior*7e-R(h7 z6re)qpn6T8v%1Aup;>MsxiKCkd(i3@M{%6AbltiVLy3Vz-*A}U@^~tm*k~t*y=lYE zr8f+9Jk`18!srp%VW;G2SoN0fSuq)vn+`18a%aIl-%iBmWHIvi>?pQI*Ec}01%-7Z zPPOc1HnPuC7eBYr=|x~F#12l7N}8aDDK`S(CdbZ5^ft9ZR+15_1=vvnhhB-YNNPI8_Adj zp|+B-^ag7#ducaeWNnSHZ`x4C$T4gwV_Cu~sSe|?sf?P1+Ezx1u#|hNfV4i0^(QYQ zBMIg2rNPs|Xq#XVnT@e{_%|4}GUzj;>Rd06pwSxilMQdQsu#~UWOo@treQQF20_qP zGpd0G4QvWV7b1{4j=^>_T3+w7p1Ea|DAblSLxtIN`ifJvp6ViH+ZksVaX{8Kp1JPq z!qzipsAOs^Yf=jUgt@D9=tbI)wdvwxQAaFyjI?qS00e z6l$&+lM8`;Mw>CY{6S#483Rrmc!8oFq1sqc2MaBnQC%I}S3G0*1p}dg+5*x!D;v|W z19G;eIcM{2PNQu?Y)>Ohhz)9N^vV`BN|&=qjTx11n;IicEJ|vjGa?JWfURoO-LP4W z@4G(6?rigvD_$i#<$o$#|eQp z6@{o=+1Qysf@H4E{PW^QYe1Z0lP)(j-w90R@+hAbPE5IvV|Ue$75pU-t(;wa!N-W2 zd%uL0a+ST0CEcywhjFqQaJ^Qxy1~EbmaSgw|0=Tz*ra919KQ~5d#{eC{jV$lY+PZ|}rs>0aE{+{7JPA}FKs2kBGP-O3)t-L2AHP&${2q#aup zYN~0i%Ag_cGHQkYh~Sme?;^uY3pu*WpkW?fj1`-Q_$Ly1b<*VRV_u994;f>2h)3Se zFi{=E6OLLsDwUHsn~SL_dtz)8|G1eP=7uVlOLJ~LnJ5pkcC6XqZk5AYOcEy^aH&Kf zv8?J)6TMJ=r}6A5^WxbtvDt};d}HnWbD<~*--wFP`EC;sOwXzc6-c{?Y7Pz!*y7h^ zo%{H}B+i=;TulzC=7acNiDinNM;MUesxEIp3YSvZU$PY|)j7&%WS0eR!YTqmxVgs% zZo10xO_#v9pDNAWN0p07>iMmvi zPSUu05kH@X-?WU?hiEZoy(IkiC}qKWlk&WKl49J8lm+iWCL{A;k!J1co@0`lx&C_% z-waoSX2!_(7Sl((r2lo)`z>^;^ih48`wes-=^o4~_uJ>>P&>Xd7+kLEd2X^a6HU9k z0wu`pR#PzAm7xT?d$Kp)^IRul3VG4JT{l07`ku?8GUz0uP4Z7nHPXg9=mv|}fszjT{zBf2ac5{k7xIl=k4aa&>6q|&4Qzhm;amJ zM0olPqe{P%*hzWYouZASfL*q)b`DK^lJ@o`KRM`#494hRTuXCyRE2Tstqie~Dnsm| z5_Om<&7D)q{C18w<%=PxhH3D`&D*$PwT#8~&%;eMn$O_KRn_NMVOSldx%=ixB7eO* ztKt@MiNv1Yqsl5?q_f4lb(tcE-;`@h?+`EOQF&%&*t>#a4eH~U1v{9&7XBO31d1&J?BG#89@Sh(FlkL!nI8=J)sZ^(QRf4op>x*9&b|7#E`E<>I|Z`MD7< z4&vR%;vV@9B2A&`{3;>M#EXP2{2t-{;@!X8+k-`!!lU!v9xM=VF*c2IQlcdM&&}@0 z^818Mr3J^o_l8rRn8sf-#XY@A`>bqRrliMvEiu>%(oGcB6)ng+jw)%_y5;QTy=W<4 zmrOfxVJ2~YnNjYRY^G>vSfXcbR}%;8$fhIdNdL2<)x7HUKQBIp)~){q;UfyV?djWZ zZbL!)U*hKOe_3e%D?*UQtNi^nme*O{6vEkrB->KBLpEBZHuuUU%?ZA6Ss)vzuz*{} zOrnI7K-p@%*H{a-?J2{PT%=0Xo9oKa{V$4IgcNMqlfvD5-aYUC$u02T-wvVu-nv>$ zNHH7_lyCRWU04^XwIU6*RZA8vwW}Ix>z7n5wHs=x>a2!_g^PP@mPUGOgefgIiEO4I z+g~dwhuhCFGb(Sy~<|9z+&uw;A!SE`yWg?m&x9L+Ph-yNoZdf-^aZ#Sy+t! zqh0iYw^LZ#{qJ%651J!WSyNM0v%IRlv8uMAYI$Q-Q$sd0-%7Z@w%tLkd%YO89O zR4o)6P@T}aMGbYeJ#|YLE*5_)Vp}P3zg(Bi%-f^(Tv8{3mUxlG2_L6@d5zvH#(?N; zWqM>Ya~o=6(dd#$Z&gjh;w4oLz4ei*NZsOvRd%e#TH4TC7pBouf5qg@Qp^wtX=;QPW`XqgtK1`Q=*RRE`YHX4eo4QgU(;{tcl1a4GyR4BO7GCS^f&rD{e#}4f6@E>1>9?eETdRP zvlOuuvy`xmWf{j(&N7jul4TOh6qczh(^#gn%wUQq8h}rG}-JrHzCsk!2Z66U%az z6)Y=RRsU@>Ih|!a%NZ;^ zEN8M1837ewGa^gDeh9f@O&1ES4loip6C~vutG9#Il)Xm}Lvg*(~R< zY-Kr@3%E=i$4O67l15TKM!oC(_82ia&7e5&q)hq%@L_i$5{t zJCx#-_(M6qUuh|7DQ+p`o1e6Z+kYCHes6IBPUlZOLRv22g58VY^^cE zyraUH+aPZ;;)*H02lo<9H3QylM8PZI&%mF9zXX2;vQeC?&3hx4LhMid2~owJ^6p~_ zq8|ZPgSB8AcoKLrcnWwrxE?$cw7@7h1SUaTwiMie?>B+Bf*%Jz1%3{E9{dvcW$;Dt ztKiqcAAvsxe+y0!4a83cI1#J_d1d5hGRTW0KfF%z!%HPUyjt>e7|06~KfEmQ!|M@0 zyddz?2l5iY53d6J@XY6jCqF+t>-ph{&ks*~euCXRM)ac`w8mhQJ2U$E%yC{^RwPp( zBFbkpSPYhcW57~yEU1zikMCt*IXD5V04IW#;3RM|I2D`*P6uazGr?KlKH$FKe&GJ# z0pM)#KyVIt5I7e+7(4_#6r2an2M+@e2dlsZpt`)Q!}ofy0bB?!0vCfzz@^}k;8Ebw z;4$E_;BnybU?aE;Yyy{qE5McDDiHs7KtVIUp9r>qtHD-q4cHELfSq6$xEAaNPX*V3 zr-5gHJzxZkfi~C+_JIV(!G3T9I0!mm0z3;$fi9Q^H-ekM&EPP&1w0!(2iyvt3vL6? z2QL6G1TO-&gFC>B!AroM;HBVYUK?Pk|o-KMZ~Zd>VWP{3!S_ z@Dt!C!B2yq0Y3{q3qA*a9{d9MMer-&3*gtlZ-CzfzXg69dxCg^xvuyRhPH~IudDwJ$bisx literal 0 HcmV?d00001 diff --git a/tests/testdata/azure_activity_df.pkl b/tests/testdata/azure_activity_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5b9694aac6a430767aea305cc73cbf9346774d3c GIT binary patch literal 89507 zcmeHQYm6nwRi3qX*Sq#kY$vvpHAy^KEwIM!neOW9s_GEPtjCGHnRxAZHxCHTP_N#; z{6UH^AR#~jDN>{WLPCN_JbsXv zuX_57IVn^PKOTsy?Ty`oz{(-Zi#mEd2FvU(!vdJCnB6_LD_hFZw}$ z=YzVdKUf{VJh=DMgXahP`djA~kF*E4c%_*9%{ei|qYvRl-64CUmPx^73GI?1jP17Bl$l;PJt}*)6ks zUKo^j_P4q2@g*O!-0-LxoP25d?v4E|&kYV9JouqE@K^oJzP~##Cqo~`A6@co-EFj* z2P;0kb=~>L^g<84+4f<7%r6Z3Z`p6Vjbn}O@k3qR?SV6ITYsTWe*4PRp6S#n?>ggJ zop}6l-)Z&Q)<4v4^_Eb_A!M@c#!_h00XOK6&$QZYzfiR?oNu4e3kx1{yLI{Cz|wx_ z+P;J4?3mGAf27q0566dxbv3sm46B=9^tqyV&S- zQ0d{uA`Ph*)ZPL%_9Nij|aL2 z*yY0q``f^}b{F{UZ|!tjOM_V!4m=LFRAqXguDqjsdjN2`dkEi&S@zn&W#de`K)OJ>aOHJj|050CZgpBkck+<8 zy*6fp2b!J6k+R#F3e(z|R(#`Lk(9Q3IS;s@b_^o8sP z*+Lo{Xso2U1I?vqUP$xg&Gg(bexO;j+KZKAS;MzZ=fn@f!>T^qgcwu13VmGeqn(F( z)_U(kSmjC|U+%WAmUpFFzP9|;S}*s}+FCC3_;R;pwY&>$$)%)Ut@UyrT}rDiH1FkZ z`)YX?+LE>D#|HC<@lL=9KmEHOot@YhV#E`$JBV#h%(R?;?*xnEdk354<_LURO|n^- za^8NfvSWm!^OkfiZjR9A)g*YMWH!83GSbg#wh@j-EwoBELV2r6&Rz12ot7Q+uhQ?K zM`uvi)%m>w{o3mir=-kXlqx49Uc@5G6KA5_8mpX-6DbP3k3q?ZA%qap1u_m~Eoof1 zY6GQ?m1LRbk+QCccw8hWH(#M5f?2Qa<+HTjoWPgeN0Pwn-Us$7B0H z`r(<6PA{&!G-+E{zCo9-3!dE7(arfzsms&lLpL@GOLVh!%+KrQ84{!$m(G%LB_@+h zO(vX8N`-e6=zKjtxSUMDo>)w>Gy{L=d>ub_1-vjm$=GC;V>#l+KYh!7x7cVlurwDh z?i|A^M7luyAUKdVkTD=zc%6(5&p$hU^Tfm+yU@U*yL!um18&cIC-#Uq$z+@sQ6>u> zaczr8DPtqy9G6@Q8QXZz{cCcs?5z-TubceU$re5KN2tnv?9oGq_l)2C{Gd2 zbI*U)MsLnD{h{j5?tCNW6ZA;&$n22Oz1&02C%RbVyFBP&pXz3Jxb<^zxvr1N)Wa6i zVB5lQd#bZ$w`pw1Ho{7`srK3pt2PdnmnjCGOaaBJvL=qPNcVm-w9OV|gK zIDb~XIUUD@`PJ?5$=Z;eKV3h_&f{|(G{XAYPSV_Fb*E_>VQGYY)3w+%pRYFXSf0=S z=E{>4KI4LIkisrV;fdhU*;<$IQI=!)0?XRhnwq_Zh2@vV=;`Z1_rkw43q{ge^Spif zyr^a^zU6eCJS&s5NLux}ks6ycSG!zuHPS88Ez+%XeQ}3o8P)fSHibu?EFoD!vV>R0 z5?<3dBlZtHt2gIOtlCV7GY+4P4xJ-bIlMn~qh5E|(|K8rEjsC=W;{2Ksm8{L$ z{SL0gb3EDkYWeIn@Ff%HiSrlrMOzwS&pI=uS>@Ud;ZKhnG z7D?;u->ARtJR4*Q*F0Tb_cI2P7D?+0de>>(qH&AHtz%c@U1riP(ya}mTh}z^a;~+A zZ{qo<^^CNVv@$TXk`(KDH_|Mbtj(s3@RFUc%+)oBGZN>C^P4TsNSG(gUjxi*%`7ky|(oawOhTVtGu~4S>PWC3w%{yYu-?pAC$7u4B44_ zrgv!6?WYJcgc-ujnNJ57x0xGh6KNA^6KT`v&)Q^XTIcwv5OIb$bFAUkFY$U44>@6m zFhiIr$O|DOUf^@dTZH>gj+ub1t7AafUb}W$vO>IkGdRW800T@GkZS z_^3Bo;2#M83|U~Zz+{1Gt%_g7cl|n3^-`eaFQd{bo};}+n>eGpdZyKEdfRQaJ8$xl zq799i{_kzD^=boZ@w#oR-o&SA!)K*B7x)Gz@%V!L#>C_6^jVg0OgKJsT`!@6P(i4; z7Vol?=8@)IDb2f@Z;@Xf98-(WxBV{h_=0T3MZLR1JSHBKJs!@4$R3kD7T5IcC$h(6 zkI5cyTzh;q-z>O1wt`xG<;OhYG4c4!c?F^3yr_7k{lr-HN8w%y_nxseD-C`ZG*R9( z0qVU{#5_@%i<~rZ#AB@@mB%_N3X=)rnbaa1^!KtPw=6MP#EexD&r==g*cQl@CtNcj zt;mtg<$`6omyyybkGNzy%4K1r%onjt^9+|wC{1$Cl<-mExrjJdEK*Kqkxh-zKF(yq zLbA;3R6C>)oa4=J!F`k$9vN(!8Sjcb%TpvnVTFi&5(yu(h`S<=3`T_|{@c#PFAne{RSM{E`tGGb|?@@+IJjM&prML(LbMwG&+d>c*lPR2#1 zQx?T(KuBy&q?m(DlRVc#G0z!;6tvI=F!xdFa>z7OS)@}bBgRrDGm#aki>qWVF$|(N z$`=t2*wi+40hT<@atOchNS4^t#u*2EM4SMOS*Xui091Po99t(PlA+|xXE~4J#09(o zCfXJziUkucHL>Kq_XyhBwSOCeOs ze5A8HE3!oL!UT}#(g-F^0zD1MGN`i#C!!*gEaxsMWEy(P60QnibtK|AN3u{FS2D8R zJ5%Ibx~iumc41^GjKtm%g*9SNM`Z7a3>i@hBO=7G%ob7tA<_W7GT7csq!G(gE&vIW zWWl}xYMJnkMK0Gm;>xB#uTW8z2${!5B+duXd!%MXmPN`+z|sInTI4d4nPm{X1{Q+h zQVJ-I^CZe0HxRuyk#oY9#X5Ybio9BMPUG zB(esh0T&oUwANVplky}kI1sEurB=%n8V41%8a-taYG4hI3ghzBr%X#8hK~{1J0jmk zWXP!O9f@yuKhm)0SV1!A)6EVs0b64e6Nm}RTfh(!t`!peJ~GE-iK;IZkP==iB_tIj z6(kir(4@)0abiNFOH61J6V`v3dZW!v_QG3a9Z&Np5josFc?{RShDVkqX$ogQh~V(_ zWN@}C<>60q1srJ%j!hlMMTCGwA&P=A1T2t@7m1f#!AA_26TDEty(9#DrMXZVPN+0z z!I1`UStvh^lozNp(Gp%(c$QO^Fvm1J`zH8MeVQ`E5gt$i9$@&};30!c4jwC?DpfF* zr@_Bl7=yy$N=D=1299I+jSHSe3H-oCo~F{~RY)YUiPa7b&n!H$hO){LJN@vU#Ua>rc9b&0su!jYo9bASY6_GJ$s)x@%$B{5` z@QWj`A(&z&@+smXoEcP_=ZKIbiOEyAkz*Z_S*=nCB135icyIxKn?fOx&x*v@tl%~d zQb=>Dyn|OSl^Q)&Xg%D7K5{OP;Zl|<;xU{<<`L&FA-aT3lwdGA27-|e3U#*^Ej(~_Hg_$5&Ys=5Phs500gVh8P5`gc4X>8 z+qle)%8iaZ^I%9EyzDmkDP!ki^g_e8k7T*Wi8FApG6wg#N3(QNply(Bh$iuzRnB-J zQZM0{gdaY5J%P78%~J%RGNz=16SoQ_ah||Gnn;AXVw7g!Z`FnIk(F@6TO$l}!6Tb% z7i*Cqww5bM!3vZn5DUl=QbBzPmE}PSEJj=hK3rfH0^lAlV{PEQjdNe5+N45Mj$8Op zr4(p5SP{?*H876DISNq=;WSHmV2DlAtk4FU6$k$={H~EH;KGHZZ0wEG2!SA(@zyaF zr%*e@MYw_T7%mXdOV9wX4FnXD`53b0kWQQj#x*5Frrr>4&$9R3O?q@ zLzNKsLo`IelMLAb5y(k}+_Rry)q=(GPAAm^4Kk)EN-~YqN}3ZF32@C=r$Q9^B%Hej;9q zn2k?EJTwHHk|eYZL01Wji2hk1q)^Z-1yNX=Yvj=;t3pZ82!tnt8AQx;xeIxL41pP$ zRH;QU6s1|%p9G9VOafc`@hk*49cm8nOY#I(tqO!nVU)@%5d|}-AX9M0MG`4;M3!SP z5T$Vg&wK$nX%m7Nu#tciVs^|zEgy0`orT}I%5gLF1VE~dukdHAyE!RN`kT|*!P7~NErcvp#iRLCwGsFXP z4{^do1HFZV-e4$WP+JDRqBNBv)+k^N3W0*S3k8J$dw?Ng(=eQsi3Z0L<0F1Sg6AlZw#I+sN;&!Y4hzap{m!CZn&cE{Pw>|}W zYW%-G*$C&~Ze=>$|EZ~Omut?4`G@5V(+}$zmOCtOCI8octE%VJ*Z+0*Z=k#rUwdrq zmqPg`{-X26Q2y+9{_<7OQ*V6eUB4QxPk(p%*FjG_{pa8KwLrhQW$H_TZWFJD`@?j@ z@`mM{`rces?lApIId#5adBgg~Uiqt?e-g_3gMa$QmqDkuJ@S>;*3hxv-g)4wI3N4> z`_vzWbl-XC*6RFEr|3@7?>2T=l+^fs9{JV;d{myqPdSYtp(QtoAPaS<* zbw2jxJaZ$?)^J@bmNd2@>~D4EB}d z<*hIbd2jh+<$dM-WmHaq+5FCQpp$_L92lphQ>uYCAm??sra^VGMcuCL7W1!nlkwHf^LgZ}n- zGU1a+Wi_|zr=cI^&He3Wp=F=!4A3L=xWE0FUg(88AMfv2Jkp-SO`M!zTLTwhTN|sk z^~U+*VBl%S6)isbz!*&F@Nal{Lw(`Knr^Krim3j!*Yu{E?x^X`n%-Q~TWY$irsFkz zOHFUB>FqUrYfax)(>rQDz1ij+)+G(;uqoJ8Sx`n!dZH_tbQvrthg~fME~( zoqsJrtrdR0j-UPx(`zhr@n$3zK6Dlu*2D75-Zp#t?3OTMt>*Z`Fvt8ia5TGn_U<}` t?{!dgf9o8q+n~QQ424BoFTyPDoLlr=tkB2U_ykJV-EO-B(f7>c{{x04QR@Hz literal 0 HcmV?d00001 diff --git a/tests/testdata/host_hb_df.pkl b/tests/testdata/host_hb_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0f60081ead03bcfa00c0e78f1c84777d94ba7c60 GIT binary patch literal 2767 zcmbtWPjlNu6t@#6P1B@F+J^oCIwcbzh02y}C9$EUb)7af#z`hlS}w)2lGa{POY-cl z5(0FF9$F})TbED3oh#Qe(+|K52hMyFcq^@fn*^o|%EWK??Y=+z+xOnSZ*o7p7|+CC zG}A}EgIL}UD9(2%^00`GuON@+0w3EJP!12bdzdIJn>SIi^19vAl}h0^_2DS{wOCtQ`}j}z;-AKKF07~W6k}W1M?SARV7L{8)W)qn#<3@& z$t~BztJudBaSY2t%~cVdPH^zB&sS*>_Sa&3rxVFq;PpcQP%0bWpKOR|ES<<9XzK

(59nP0-VJ;aNrvkWFpmAb)T&jx;6{xg-^V=>ovO1{(7lWH z>Hr?<^`z4?o7f9Du5Wx2_+6KW4w#$X=OM0GQz@$!gg%$foos~`v#HzX;D|iX@8>F|g?*IqvBcja&YXO9Nkm2{GCVdZ9E1Avb!rxxHOq z5m7FYIMGfufI19U7v+2j{95<~0)R|L=Z}g-iu|q&#!X2F0+6h%MI(R^j{h)u%~%;72VPB3h`f*R@wuKVMubq2+Sf(v}vBOPXmr zwpPYYMRS%bre5q|6Cvo_FFIG6xfkIy;8!H@gqyWjpL)2>`o47uP9S4z|IJ zLXJF}W7rO<%lGE4T$;0?q;&BXMfL;N?@Er3`EEcTGy?!Lpo+;b-EnQ)M18r%@mUpK zjy>+$OwLntN>alV@gqWMkSLK;GLA z0Xr%9De_*5yvG0q6vr?e&WKCazvO*$u$tt^rw8&L-ptw%ltp;w;E#(ZvI0Drt+pL_%^=gt#!Co?R*wheIvA8ynMY1FdDAYhzJO5E8t7M1?z`lyWg?%G$lXu7h zxlG<8?~^M;BY9FFIx$F*l*lqElM4BOd`Pa6Yvdzxo!lT*a`V4B_3ofk_#tFGWWSj= z;@&2QpcfHjJQ}rR@4^IBb3i5<-9f!juC$^tuS?so2qj_63SDSFO^@f=%H`JQXaTRcq>5v(dCWTPBz4`nlt7 z+jhkFJ_GW12L%oDg4xVzY7^8=EY!^B7LL}S4+N}RI-BsZqu2mr?9Pr_ySlvMnptfx zwJK_(p{{_wg@&ouYt2@pt+znDv&mt!J2!({4$G_d%*ac2af4=m#-6kH&TN>GU$U(o zolQn-W!Xd`8&Amgm6zSon>rhQ*`A%9ed0a%b3SGsZ`s-vjZV%bbDzj&@*(S-&e8DgpX0vHR#J8dLd|RuTt>bF* z9Q53QdP-(lg*Y7sndj6M=tXj(Pi_ks>T8abYE5JeaPBOb%O<3wrmr||orcVUen`TJ zW#oIMc6NTH*)prsYPGTiNw_QA53ah7-FDJiX;yS~>9|>~H&%{M+MUtjavPM40hwPm zQ5vR?Aa*yXL7g04h{YR@X(N4CtrApI6HoDe0 zy(h|LvjGl3s(Y;S=h|4a!_>>A!&6+Uw~cueCEawb{!pYJs$r?VQib}qJG&yIbM+-p zGJedBbrkg|%vrUjJ1KQtJ6)ZJLNVt}Z&J8;z22xDy)m2)6tu9MUz2Nx=kT zsSUO4l*oOKcX6X`Y{pKRHPxNwTjrhgyYm2w+I91m1)m`%tQ(!t8k+sH?aQ$CSX(>0 z;+DaNT008~o|r?_xAs}j&UQ9Ilo~Bq!aAePR(;u?VbOsJ2OAtr6BZ`xPU|kT##y`Y z3U|{Cd)c<`?`*6DR<=h&E87TK*(U#RbY$ecZSMJCNCzVh8(lL-hSUwj^^U{k3D=JQ z9CPWJahE>hQ9K@|tMOqxOff%hhw-q!q;cGj`7p&ecs$(3{a8O9UyWk9Fkcu4^I?kl zF(2*^+l%?|IOexu<%%wQoU`mb;9RWTI)9=_O+Q7=%|AVV?8ITXNGaB9t$MRw(sSNX z&ROo^Qlns0E47n48yPrV(OZ@3$%w=R1}~k2b=zwcr>5a51CL;}YX^_VOLil9B+-T) zp66l5u}gCi|KPGLJ|xN7F<87mNhTyJN*oC2NrH{!Cy6oHpTuZl0|HTtHXQl=@CDc+PMN@GlwVkL!@ zM1@ra87>R=C@L!`W?82UP0=V`mt{)RxH4s`8ZFUfRgyU!v6-?dnzByGipEpCEGm?& z2|5U{a+%h2oiB61Ru)B0)?}7aWL2PefoFlu)G1NZX+@IClERgNjb{X!HGqRjJ6L>v5JLw2drT4^=6$ zbO9q-!Vn|TvJZ@8D-$r1)iFXOTEu{n-0RGpxdt{8osPll>{Ccqjn^cF#MMLWb@rJd z4iB+2=D~Ymcq!F5aZo%;@~zQ{#QL5;;)u^U#UgE9f*LX1H^yl7?8Ca*vn$OO zpTT~Vz&9oldwak>ao=G;y2qS? zc=fjF5js!P@TAj_4n!PoKJPxJ1DAIET7|@<^*ms zT&PVuZ7eJXT2i3p0+S-e$%c#yHItEOyuqam|;!fsYDbkM`Jma8Tx7g7=R@xNd~M^LqS*Lp?c8qVa0|7y$fT7W!*S! zWd$LJ|8ui6>=JUrNDONNmi_^q3VXX}D7dqYXMf-j9v$tSw?YeMY_o>~r#vEjBBpdB zh#-iV()Fw|+Q0AVfJ7D(F{SGXjjjc8Eixgs;btJ3zUoAmQh_lt-2stQU1WIOQ@S$^ z)*=%!A_IsJ;#$HKbSn&aciEKbe4b_5JTI5a`LfCL`Le*5ovXoJ?x?Jng*>aWW}f9F zHZLlwdqLq13p&T=4Ta_Myr8Lh-89^jq=#+L44-GyD;=R6t5DmRfdzy#?dx-HLpq>o!4kp$ZL`q zNXEM2nVza7yEBNGkl<8$2@#DZRM0BX#6G3YizKVpH*GxWq(rW7n8>eHR43KJDwbi@ z@e|7vPAYyZoSKhS3`K3U9rYk816#u8GMuHF4% zCwQ6AonncQXQsL_=>?X=dDm`)9f)9R8v64RVFwIMRheK(L}d>ofZcOGA(sK}R4*g( z?qLT4yVmZ2fe9c92BvbSgeow1dLRKy5>zI{+wCS*nFw{Vm3(&T+NDG@-Z8AR{iq4pb> zWNhz_bPO)`G|jSrwjf%t6$T3k#>Fhakm@mp4m+9Dj?)q_E?<2DXWFa>c6LVyA9 z($NB}@j(rHip>g&_KLc*MajVp11ZO_5(78(hRtC#Zs20rv{b3JFTmZ>=PEU${v7ND zk$!~vF^%KKW0=PAr1zys;xSD7MIw{y7d4JQswvvjlS>=xarKVU`1c<}S=}w<_#90! z=%;Kd+CAgI?kBZ&t<~Pm$T^zN3A>M-nzuU#o^n&a8|}4VHg>Z#BkuOLrr6Dhw6t4m zSC)+349_X>W4i}2JG!mf<5Oq_^U0jp2VabOr3Wk19EO!71RHiuNpmgCoEnF$9SbYZ z&eE8WrDd#7_N?O&qdv0)liC_%Lt4bHUFt{;&YG_;k4Tk=K{J_NeWr@k))?5dD!jJr zeFbj)rlgg6pXg!BQx`ST+=M|>>nO(HH2>CWLne-qDktTO@jb1G!*P9IecEg}JAT=n z;lcU;pFtRbU*XGKL7pm3Gt@LIqaW+@GNmX5g%V0!VTvzK6(?yC*oquCDNk`MRbXh? zSwSe#)FdwzC|W4eoWf7aLV*W1k>QI%3GL3nmrB4U7pVfL6ey)Av+SfeNf%iG*tnv^ z6-zKkvos6)0!Tbnm@bs4k|>o}VR~wcE{OKx(M=;G&PTTD{kVkQ6S5HosW8b%B}q{z zhMg=>d_mx;$zp-0L~*hp$l_E%C`o+;H|tU-s$s%X zCuH;7CDcrm?EW+p6}V?5q56LEOr*ZEhX|5*&Jabrzd|JP!-Et!L1i?Vqa>aeVW~8D zO5p{85@6FDMKE<)HPCiDg24#_>|H}~ssTH#@T^E_Y?+~SiH9F*>Y}QvXnmI8fe%ep zc}myIDy;SrP05BXP$ny?w4xd&t-@ELdvt~28Bhqj=SVW_#iKBkECFPhPAhDdWiyimiLW|1g8gjpnH^V}uWOz18Vnx~m)8e(3uX!nz6BK4g;M3BUD zrl0r_Y9{i--0KE%+vG8>P=M{kXcl%K6JUR_TjxqsJX2yz#VK)0Ec!YM5+ia`u={ao z{x~he_qMbGGQ8qnOIMY4)F$aBu*S#2(qMFwIXyqRW`|vkH^oEkI)cT+9T6W~!G!LP z&+eo^N8%cdbhO9Q7fwlxz6iW4+V#uJcf}fToSQ@N8`dF(n`GBN4tos7kR?emC=NEIr1&x|QVL^el%i|83>V>yB%@e`4;`(b=g}@OYuq?4tDvzCP*onMx+TR?oudAx|U2*B~jO9QB$(U7>_VEKALI zggb~G9uBuxiAtC~+zy%ECtCPXUjMV#!>#0;ouPVUN6#O_8_L|5Ttpv<_YL_T zi}r^TKM{={Z0@2)8^-_2d2~OJ?4CPG;ysR-kOYhm#m8sDxDIc< zYN+srD7c+&s?ApXfuAkuI4VxgxgOayXz`15RmXOAN4p1pwtkOS1&L+oc6JZ^db^#` z13#PpILf(!wimy;8NQquetbY;0Jr8PO`s)Jq*TFxmx}OZ1-=VWbc*3jLDof~4FACw zB7&gP5^r*pA)9bHsKJXnWQ9g|C{>;@Seb^`egK=q862mvGR2j32HgzRC|NdCN?_q} z6V~L)@Fo#pWB9Tv)3iaEiiTbT0xvO<;YKMX!m}A1X9x-_!1KuPCKI?DsKT80k3uuB-3C;MQ63llSNn|gq~)b1;T(3 zx4E7&C9`=By{tH8BoWN7(SN)aC4PDTaia3XfwgE)1oNAc3``_W29>NNd4vp^j4RQV zWc7(6{SWwU>|Z)j!VjD5ho37EicCD2Yb_&*#`rB>3-A{A2fui6*cw_aCcHNMMi{mlmT)@&6=;MxL%{v;rilh{0*y4Ow6U<^0nqT^YVDAs)!4)1 zA@P7Ip|P-N8gF1Mxevv&!@GF?<9K;LhyOUulC-gSE$LSJWYVDh=J}6(;_-(G%n99T zCiw89E|K*3bw>HqR7bh}SU$u2-D!XcBEIhULt;aG;@jLU8sG~_nt_cvVR_!+?7xuX zL|M$ibDsh&GVooa-MN!N`z_?)!|j$?nul*W6S{XWXd8tbd|4&~J3w>rXfiyM9Mu=? zNq|h~j><=Sl6*?)Pf(T6P3~(V0NU~*8uF+k(Vhgz{pIJLWrY`ea0dRkf5o*?sYpBukMC_Y%uQCV(P>b2%UlD!iSHq2(d-O$amje2{zdC>pcyo1Y)`uU1sHkt>k_(`ms z?$-K2@6F>?Rkz?rqMqP$RHM1lT&kb7JC`#s#xTof&1hL>c}YFnmo<95)`BO;4ER;T zOV`y975xZqS%(Kkudf~FD|)L^U50pMk(hN?i2={*4T2%M37d=z)V#UAWil++R$E;P zFKUBVC0Wd!Ojg%{F=SBY&8G+9bTb%>Yh$2R`RiLC*`9v~X6ml@LSXP#)&ZHngHo-7 zVn-I|nM{gxK&t+nZY+y zMAkcnor|(6dBrn%@@@D&Q-}TnVz+4A7%j&Q|;NDU9iWN2+ z)jIbsqpdcYaHpwZ=F~<*U1{bFwPnKn3lna|Z-pO1I>!YxWWQ?fd(nR0-rL!<08FZF z-D%x5GxCyc?fQ&8XYZZaFvCLQ{?5kZvVG-cw{O&H%+Ag}@gDp+A2TBWP~2E++Q1I~ zYuT4=u;j|*uWc9^xpeV||M(hf?!0scV61{v$wwmbmW#V{_#d0=pX&neiPBp zKQ{5JKo7nC)3F~2K<9t|f457(e`429jsQJ$#^An-=!GW=`xT&{yZ2Au0(y1o&X->h zfj)66_whX7%slz#!!ool|HH5UJG2ko^Nn-&d<6Pmd;6hRpnd7}SAI0{7{WXFH(xFQ zeMenfeiZu8JUI7X&>qQc_>NkF_PhVH@IA!$h40zF_)+M8;oOyfa_oEaOJ9E++WX)5 zaTRb6F+aWUV~2qM)8o(n7VuyC#LwP&9&oPyM)R8z$X$Kov#*|<0Y3R}zxD1Uv_EzE z+1~>{M!x^z$hcKNHu>4U04Lr^l?aLJrHQ*yYwMKjv2Rp6(y zDD|C)f{iKZJ|*Vg3jnmcw>=6!T$RY#(l8Jggt<4O1+K`>`BuV?7uL^J6}& zFU*Jgv43GaJdSZu%`E;2q-S>Vb@*I-6F$6jegn1t0)1RXA8)}2%DDNAz1P}mZG(lu z+8#~!O|Ygsa0T(mmSW}6x@A2(+rA7-PKp%Qm3qi}*xGCDv-Vq$Sd^8s4qCLuSggfa zyd_woC0QS_9;$|QFx`M^9TEDI)8xY{Ed5b z!lSo%bc;u~dh}M0Zu97E9^LNI+daC|qj!1qZjbKr=sh0&kVo(J=zSi&-=iP)=x&cb z;L)G+=+Ap}k4GQ$=tCZT*rR(ry3eEgJ^F}84|tUFXwIVtJxY6&@hIz2&ZE3X1&@jz zl{|_jVK tuple: + """ + Lookup IP location abstract method. + + Parameters + ---------- + ip_address : str, optional + a single address to look up (the default is None) + ip_addr_list : Iterable, optional + a collection of addresses to lookup (the default is None) + ip_entity : IpAddress, optional + an IpAddress entity (the default is None) - any existing + data in the Location property will be overwritten + + Returns + ------- + Tuple[List[Any], List[IpAddress]]: + raw geolocation results and same results as IpAddress entities with + populated Location property. + + """ + if ip_address: + geo = _get_geo_loc() + ip_ent = IpAddress(Address=ip_address, Location=geo) + return [str(geo)], [ip_ent] + if ip_entity: + geo = _get_geo_loc() + ip_entity.Location = geo + return [str(geo)], [ip_entity] + + if ip_addr_list: + output_raw = [] + output_entities = [] + for addr in ip_addr_list: + raw, ents = self.lookup_ip(ip_address=addr) + output_raw.extend(raw) + output_entities.extend(ents) + return output_raw, output_entities + return [], [] + + +def _get_geo_loc(): + return GeoLocation( + CountryCode="US", + CountryName="United States", + State="WA", + City="Seattle", + Longitude=float(random.randint(-179, +179)), + Latitude=float(random.randint(-89, 89)), + Asn="My ASN", + ) + + +class TILookupMock: + """Test class for TILookup.""" + + def __init__(self, *args, **kwargs): + """Initialize mock class.""" + del args, kwargs + + def lookup_ioc(self, observable, ioc_type: str = None, **kwargs): + """Lookup fake TI.""" + del kwargs + result_list = [] + for i in range(3): + hit = random.randint(1, 10) > 5 + result_list.append( + ( + f"TIProv{i}", + LookupResult( + ioc=observable, + ioc_type=ioc_type, + safe_ioc=observable, + query_subtype="mock", + provider="mockTI", + result=True, + severity=2 if hit else 0, + details=f"Details for {observable}", + raw_result=f"Raw details for {observable}", + ), + ) + ) + return True, result_list + + def lookup_iocs(self, data, obs_col: str = None, **kwargs): + """Lookup fake TI.""" + del kwargs + if isinstance(data, dict): + for obs, ioc_type in data.items(): + _, item_result = self.lookup_ioc(observable=obs, ioc_type=ioc_type) + elif isinstance(data, pd.DataFrame): + for row in data.itertuples(): + _, item_result = self.lookup_ioc(observable=row[obs_col]) + elif isinstance(data, list): + for obs in data: + _, item_result = self.lookup_ioc(observable=obs) + results = [pd.Series(attr.asdict(ti_result)) for _, ti_result in item_result] + return pd.DataFrame(data=results).rename(columns=LookupResult.column_map()) + + @classmethod + def result_to_df(cls, ioc_lookup): + """Redirect to original method.""" + return TILookup.result_to_df(ioc_lookup)