diff --git a/conda/conda-reqs.txt b/conda/conda-reqs.txt index 196910baa..835b7c166 100644 --- a/conda/conda-reqs.txt +++ b/conda/conda-reqs.txt @@ -4,12 +4,12 @@ azure-core>=1.24.0 azure-mgmt-core>=1.2.1 azure-identity>=1.10.0 azure-keyvault-secrets>=4.0.0 -azure-kusto-data>=4.0.0 +azure-kusto-data>=4.0.0, <=5.0.0 azure-mgmt-compute>=4.6.2 azure-mgmt-keyvault>=2.0.0 azure-mgmt-network>=2.7.0 azure-mgmt-resource>=16.1.0 -azure-monitor-query>=1.0.0 +azure-monitor-query>=1.0.0, <=2.0.0 azure-storage-blob>=12.5.0 beautifulsoup4>=4.0.0 bokeh>=1.4.0, <4.0.0 @@ -19,7 +19,7 @@ dnspython>=2.0.0, <3.0.0 folium>=0.9.0 geoip2>=2.9.0 html5lib -httpx==0.24.1 +httpx>=0.23.0, <1.0.0 ipython>=7.23.1 ipywidgets>=7.4.2, <9.0.0 keyring>=13.2.1 diff --git a/docs/source/DataAcquisition.rst b/docs/source/DataAcquisition.rst index d46944771..c0580529b 100644 --- a/docs/source/DataAcquisition.rst +++ b/docs/source/DataAcquisition.rst @@ -17,7 +17,6 @@ Individual Data Environments :maxdepth: 2 data_acquisition/DataProv-MSSentinel - data_acquisition/DataProv-MSSentinel-New data_acquisition/DataProv-MSDefender data_acquisition/DataProv-MSGraph data_acquisition/DataProv-LocalData @@ -26,10 +25,11 @@ Individual Data Environments data_acquisition/MordorData data_acquisition/DataProv-Sumologic data_acquisition/DataProv-Kusto - data_acquisition/DataProv-Kusto-New data_acquisition/DataProv-Cybereason data_acquisition/DataProv-OSQuery data_acquisition/DataProv-Velociraptor + data_acquisition/DataProv-MSSentinel-Legacy + data_acquisition/DataProv-Kusto-Legacy Built-in Data Queries diff --git a/docs/source/data_acquisition/DataProv-Kusto-Legacy.rst b/docs/source/data_acquisition/DataProv-Kusto-Legacy.rst new file mode 100644 index 000000000..597e53c15 --- /dev/null +++ b/docs/source/data_acquisition/DataProv-Kusto-Legacy.rst @@ -0,0 +1,302 @@ +Azure Data Explorer/Kusto Provider - Legacy Version +=================================================== + +Kusto Configuration +------------------- + +Kusto Configuration in MSTICPy +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can store your connection details in *msticpyconfig.yaml*. + +For more information on using and configuring *msticpyconfig.yaml* see +:doc:`msticpy Package Configuration <../getting_started/msticpyconfig>` +and :doc:`MSTICPy Settings Editor<../getting_started/SettingsEditor>` + +The settings in the file should look like the following two examples: + +.. code:: yaml + + DataProviders: + ... + Kusto: + Args: + Cluster: https://mstic.kusto.windows.net + IntegratedAuth: True + +.. code:: yaml + + DataProviders: + ... + Kusto: + Args: + Cluster: https://msticapp.kusto.windows.net + ClientId: 69d28fd7-42a5-48bc-a619-af56397b1111 + TenantId: 69d28fd7-42a5-48bc-a619-af56397b9f28 + ClientSecret: "[PLACEHOLDER]" + + +We strongly recommend storing the client secret value +in Azure Key Vault. You can replace the text value with a referenced +to a Key Vault secret using the MSTICPy configuration editor. + +Your configuration when using Key Vault should look like the following: + +.. code:: yaml + + Kusto: + Args: + Cluster: https://msticapp.kusto.windows.net + ClientId: 69d28fd7-42a5-48bc-a619-af56397b1111 + TenantId: 69d28fd7-42a5-48bc-a619-af56397b9f28 + ClientSecret: + KeyVault: + +You can create multiple instances of the Kusto settings for +multiple clusters by adding +an instance string to the "Kusto" section names + +.. code:: yaml + + DataProviders: + ... + Kusto-mstic: + Args: + Cluster: https://mstic.kusto.windows.net + IntegratedAuth: True + Kusto-mstic2: + Args: + Cluster: https://mstic2.kusto.windows.net + IntegratedAuth: True + Kusto-msticapp: + Args: + Cluster: https://msticapp.kusto.windows.net + ClientId: 69d28fd7-42a5-48bc-a619-af56397b1111 + TenantId: 69d28fd7-42a5-48bc-a619-af56397b9f28 + ClientSecret: + KeyVault: + + +Data Query Format for Kusto clusters +------------------------------------ + +The query template format for Kusto queries should look like +the following. + +.. code:: yaml + + metadata: + version: 1 + description: Kusto Queries + data_environments: [Kusto] + data_families: [DeviceEvents.hostdata] + cluster: https://msticapp.kusto.windows.net + database: hostdata + tags: ["user"] + defaults: + parameters: + table: + description: Table name + type: str + default: "DeviceProcessEvents" + start: + description: Query start time + type: datetime + default: -30 + end: + description: Query end time + type: datetime + default: 0 + add_query_items: + description: Additional query clauses + type: str + default: "" + sources: + list_host_processes: + description: Lists all process creations for a host + metadata: + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where DeviceName has "{host_name}" + {add_query_items}' + uri: None + parameters: + host_name: + description: Name of host + type: str + +Most of the query file is identical to queries for other drivers. +However, the metadata section has additional items: ``cluster`` and +``database``. + +.. code-block:: yaml + :emphasize-lines: 4, 5, 6 + + metadata: + version: 1 + description: Kusto Queries + data_environments: [Kusto] + data_families: [ALIAS[.DATABASE]] + cluster: KUSTO_CLUSTER_URI + database: DATABASE + + +The ``data_environments`` item must include "Kusto" in the list of +applicable environments. + +You can specify the Kusto database to use in one of two ways: + +1. Use the ``database`` key. + Add the name of the database to connect to. The ``data_families`` key + is used as a container name when adding attributes. Whatever string + you specify here will be added as a prefix to the query name before attaching + the query to the query provider. + +2. Encode the database in the ``data_families`` item. If you do not + specify a database key explicitly, you should use a dot-separated string + for the data_families item: + + - the first part (before the dot) is an alias that will be used as a prefix + when the queries are added to the query provider. + - the second part is the Kusto database containing the data to be queried. + +The ``cluster`` item in the query template file must match the ``Cluster`` +setting in the *msticpyconfig* setting described in the previous section. + +Here is are two examples. + +.. code-block:: yaml + + metadata: + version: 1 + description: Kusto Queries + data_environments: [Kusto] + data_families: [DeviceEvents] + database: hostdata + cluster: https://msticapp.kusto.windows.net + +.. code-block:: yaml + + metadata: + version: 1 + description: Kusto Queries + data_environments: [Kusto] + data_families: [DeviceEvents.hostdata] + cluster: https://msticapp.kusto.windows.net + +Queries using either of these metadata sections would be accessed and run as follows: + +.. code:: ipython3 + + kql_prov.DeviceEvents.list_host_processes(host_name="my_host", ...) + +The file-level ``metadata`` section applies to all queries in the file by +default. You can specify a metadata section for individual queries. Any +settings here will override the file-level settings. + +The example below shows overriding the ``data_families`` and ``cluster`` +entries for an individual query. + +.. code:: yaml + + metadata: + version: 1 + description: Kusto Queries + data_environments: [Kusto] + data_families: [DeviceEvents.hostdata] + cluster: https://msticapp.kusto.windows.net + tags: ["user"] + defaults: + parameters: + table: + description: Table name + type: str + default: "DeviceProcessEvents" + # ... + sources: + list_host_processes: + description: Lists all process creations for a host + metadata: + data_families: [DeviceEvents.scrubbeddata] + cluster: https://msticapp.kusto.windows.net + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where DeviceName has "{host_name}" + {add_query_items}' + uri: None + parameters: + host_name: + description: Name of host + type: str + +Loading a QueryProvider for Kusto +--------------------------------- + +.. code:: ipython3 + + kql_prov = QueryProvider("Kusto") + + + +Connecting to a Kusto cluster +----------------------------- + +If you are using query files (as described above) you do not need to explicitly +connect - the connection will be made dynamically using the parameters in the +query definition. + +To run add-hoc queries however, you need to explicitly connect to a cluster and +database. The parameters required for connection to a Kusto cluster can be passed in +a number of ways. You can provide a full connection string or parameters +for ``cluster`` and ``database``. In the latter case, you must have configured +settings for the cluster defined in your msticpyconfig.yaml. + +The ``cluster`` name can be either the actual cluster name or the alias +that you used in your settings (i.e. the ``INSTANCE`` value in ``Kusto-INSTANCE`` +configuration key). To connect, you must also specify a valid database +name in the cluster. + + +.. code:: ipython3 + + kql_prov.connect(cluster="msticapp", database="hostdata") + + +If you have queries defined (in template files) for multiple clusters +and databases, you do not need to connect explicitly to each one. +You can call these queries by name - the driver will dynamically +read the connection parameters from the query file and attempt +to authenticate to the cluster. + +Additional Kusto query parameters +--------------------------------- + +You can override the cluster and database for an individual +query by supply the ``cluster`` and/or ``database`` parameters +as query parameters. + + +.. code:: ipython3 + + kql_prov.DeviceEvents.list_host_processes( + host_name="my_host", + cluster="https://somecluster.kusto.windows.net", + database="archive" + ... + ) + + +Other Kusto Documentation +----------------------------------- + +For examples of using the Kusto provider, see the samples +`Kusto Analysis Notebook` +and `Kusto Ingest Notebook` + +:py:mod:`Kusto driver API documentation` \ No newline at end of file diff --git a/docs/source/data_acquisition/DataProv-Kusto-New.rst b/docs/source/data_acquisition/DataProv-Kusto-New.rst deleted file mode 100644 index 33fdddf62..000000000 --- a/docs/source/data_acquisition/DataProv-Kusto-New.rst +++ /dev/null @@ -1,498 +0,0 @@ -Azure Data Explorer/Kusto Provider - New Implementation -======================================================= - -This is a new implementation of the Azure Data Explorer/Kusto -QueryProvider using the -`azure-kusto-data SDK `__ -(the earlier implementation used -`Kqlmagic `__). - - -.. warning:: This provider currently in beta and is available for testing. - It is available alongside the existing Kusto provider for you - to compare old and new. To use it you will need the ``azure-kusto-data`` - package installed. You can install this with ``pip install azure-kusto-data`` - or ``pip install msticpy[azure_query]``. - If you are using the existing implementation, see :doc:`./DataProv-Kusto` - -Changes from the previous implementation ----------------------------------------- - -* Use the provider name ``Kusto_New`` when creating a QueryProvider - instance. This will be changed to ``Kusto`` in a future release. -* The driver supports asynchronous execution of queries. This is used - when you create a Query provider with multiple connections (e.g. - to different clusters) and when you split queries into time chunks. - See :ref:`multiple_connections` and :ref:`splitting_query_execution` for - for more details. -* The settings format has changed (although the existing format - is still supported albeit with some limited functionality). -* Supports user-specified timeout for queries. -* Supports proxies (via MSTICPy config or the ``proxies`` parameter to - the ``connect`` method) -* You could previously specify a new cluster to connect to in - when executing a query. This is no longer supported. Once the - provider is connected to a cluster it will only execute queries against - that cluster. (You can however, call the ``connect()`` function to connect - the provider to a new cluster before running the query.) -* Some of the previous parameters have been deprecated: - - * The ``mp_az_auth`` parameter is replaced by ``auth_types`` (the former still works - but will be removed in a future release). - * ``mp_az_auth_tenant_id`` is replaced by ``tenant_id`` (the former - is no longer supported). - -Kusto Configuration -------------------- - -Kusto Configuration in MSTICPy -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can store your connection details in *msticpyconfig.yaml*. - -For more information on using and configuring *msticpyconfig.yaml* see -:doc:`msticpy Package Configuration <../getting_started/msticpyconfig>` -and :doc:`MSTICPy Settings Editor<../getting_started/SettingsEditor>` - -.. note:: The settings for the new Kusto provider are stored in the - ``KustoClusters`` section of the configuration file. This cannot - currently be edited from the MSTICPy Settings Editor - please - edit the *msticpyconfig.yaml* in a text editor to change these. - -To accommodate the use of multiple clusters, the new provider supports -a different configuration format. - -The basic settings in the file should look like the following: - -.. code:: yaml - - KustoClusters: - ... - Cluster1: - Args: - Cluster: https://uscluster.kusto.windows.net - Cluster2: - Args: - Cluster: https://eucluster.kusto.windows.net - IntegratedAuth: True # This is default and is optional - -You can have any number of cluster entries in this section. - -Specifying additional parameters for a cluster -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can add authentication and other parameters to the ``Args`` -sub-key of a cluster definition. In the following example, -the TenantId is specified along with Client app ID and client secret -for *clientsecret* authentication. - -.. code:: yaml - - KustoClusters: - DataClusterX: - Args: - Cluster: https://xxx.kusto.windows.net - ClientId: 69d28fd7-42a5-48bc-a619-af56397b1111 - TenantId: 69d28fd7-42a5-48bc-a619-af56397b9f28 - ClientSecret: - KeyVault: - -The ClusterDefaults section -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you have parameters that you want to apply to all clusters, -you can add these to a ``ClusterDefaults`` section. - -.. code:: yaml - - KustoClusters: - ClusterDefaults: - Args: - TenantId: 69d28fd7-42a5-48bc-a619-af56397b9f28 - Cluster1: - Args: - Cluster: https://uscluster.kusto.windows.net - Cluster2: - Args: - Cluster: https://eucluster.kusto.windows.net - - -Creating ClusterGroups -~~~~~~~~~~~~~~~~~~~~~~ - -You can create a group of clusters that you can reference by -cluster group name. This is useful if you have clusters in different regions -that share the same schema and you want to run the same queries -against all of them. - -ClusterGroups are used primarily to support query templates, to match -queries to the correct cluster. See `Writing query templates for Kusto clusters`_ -later in this document. - -Loading a QueryProvider for Kusto ---------------------------------- - -.. code:: ipython3 - - import msticpy as mp - kql_prov = mp.QueryProvider("Kusto_New") - - - -Connecting to a Kusto cluster ------------------------------ - -Before running queries you need to connect to a cluster using -the ``connect()`` method. - -See -:py:meth:`connect() ` - -The parameters required for connection to a Kusto cluster can be passed -to ``connect()`` in -several of ways. You can provide a full connection string or parameters -for ``cluster`` (and optionally, ``database``). -In the latter case, you must have configured -settings for the cluster defined in your msticpyconfig.yaml. - -If you have the cluster details configured in msticpy, the ``cluster`` -parameter can be one of the following: - -* The section name ("Cluster1" or "Cluster2" in the configuration example above) -* The full URL of the cluster either the actual cluster name -* The host name of the cluster (e.g. "uscluster", "eucluster" in the example) - -In all cases these are case-insensitive. - -These are all equivalent: - -.. code:: ipython3 - - kql_prov.connect(cluster="Cluster2") - kql_prov.connect(cluster="eucluster") - kql_prov.connect(cluster="https://eucluster.kusto.windows.net") - - -If the cluster is not in your configuration you must use the full -URL of the cluster. - -You can optionally specify a default database to connect to. The database -can be changed with each query (either by specifying a ``database`` parameter -or by using the ``database`` metadata property in a query definition file -(see `Writing query templates for Kusto clusters`_) below) - -You can also pass authentication parameters in the ``connect`` call: - -* auth_types - to override the configured Azure credential types -* tenant_id - to override your default tenant_id - -.. code:: python3 - - kql_prov.connect( - cluster="Cluster2", - auth_types=["device_code"], - tenant_id="69d28fd7-42a5-48bc-a619-af56397b9f28" - ) - -For more details on Azure Authentication in *MSTICPy* see -:doc:`Azure Authentication <../getting_started/AzureAuthentication>` - -Kusto QueryProvider methods and properties ------------------------------------------- - -The Kusto QueryProvider has the following methods and properties -in addition to those inherited from the base QueryProvider class. - -* :py:meth:`get_database_names() ` - Returns the names of the databases for a connected cluster. -* :py:meth:`get_database_schema([database]) ` - Returns a schema dictionary for the tables in a database a connected cluster. -* :py:meth:`configured_clusters (property) ` -* Returns a list of the configured cluster read from msticpyconfig.yaml. -* :py:meth:`cluster_uri (property) ` - The URI of the connected cluster. -* :py:meth:`cluster_name (property) ` - The host name of the connected cluster. -* :py:meth:`cluster_config_name (property) ` - The configuration entry name for the connected cluster. -* :py:meth:`set_cluster(cluster) ` - Switch the provider to a different cluster - this is a more restricted version of the ``connect()`` method. -* :py:meth:`set_database(database) ` - Switches the default database for the provider. - -Running Ad Hoc queries ----------------------- - -You can run ad hoc queries using the ``exec_query()`` method of the QueryProvider. - -.. note:: You usually need to specify a ``database`` parameter when running - ad hoc queries. - -Writing query templates for Kusto clusters ------------------------------------------- - -The details for configuring and connecting to Kusto clusters -are enough to allow you to run ad hoc queries. However, if you want to -create and use parameterized queries there are some additional steps -that you need to take. - -Please read the general section on -:doc:`Creating new queries <../extending/Queries>` -if you are not familiar with the general process of creating query -templates for *MSTICPy*. - -The queries for Kusto work in the same way as for many other data providers -except that you can (and should) specify the cluster(s) and database for -the query to use. - -Controlling which queries are displayed and runnable for a provider -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Since Kusto clusters have widely varying schemas, it only makes sense -to run a query on a cluster for which it was designed. -MSTICPy enforces this by allowing you to specify parameters in -both the query template definitions and the cluster configuration -in ``msticpyconfig.yaml`` that correctly match queries to -providers connected to appropriate clusters. - -When you first instantiate a Kusto QueryProvider, it will read -all queries files available for the Kusto DataEnvironment. However, -when you connect to a cluster, these queries and filtered so that -only ones compatible with this cluster are available. - -If you have query definition files (query templates) you can -try this by creating a Kusto QueryProvider and running the -``list_queries()`` method. Then connect to a cluster and run -``list_queries()`` again. In the first case, you should see all -queries that you have defined but in the second case, you -should only see queries that have been built to run on that -cluster. - -.. code:: python3 - - from msticpy.data import QueryProvider - kql_prov = QueryProvider("Kusto") - kql_prov.list_queries() - -.. code:: python3 - - # new cell - kql_prov.connect(cluster="Cluster2") - kql_prov.list_queries() - -This is explained more in the later sections on `Kusto cluster specifier`_ -and - -Basic Kusto query format -~~~~~~~~~~~~~~~~~~~~~~~~ - -The query template format for Kusto queries should look like -the following. The ``data_environments`` item must include -"Kusto" in the list of applicable environments. - -This example show the metadata section for a query file, highlighting -the items that are specific Kusto queries. (``data_families`` is common -to other query types but has some Kusto-specific usage that is different -as explained later.) - -.. code-block:: - :emphasize-lines: 4-12 - - metadata: - version: 1 - description: Kusto Queries - data_environments: [Kusto] - data_families: [DeviceEvents.hostdata] - cluster: https://uscluster.kusto.windows.net - clusters: - - https://uscluster.kusto.windows.net - - https://eucluster.kusto.windows.net - cluster_groups: - - Group1 - database: hostdata - tags: ["user"] - defaults: - parameters: - table: - # .... - sources: - list_host_processes: - description: Lists all process creations for a host - # .... - - -Most of the query file is identical to queries for other drivers. -However, the metadata section has some additional items. These -are explained in the following sections. - -Kusto database specifier -~~~~~~~~~~~~~~~~~~~~~~~~ - -You can use the ``database`` item to specify the cluster database to -use. For backward compatibility you can also specify this in the -``data_families`` entry using a dotted notation. ``data_families`` -is also used to group queries in the query provider, so using this -to specify the database name is not recommended. - -The following examples show the different ways of configuring -this. - -For the following two configurations, the database used is ``DeviceEvents`` -and the queries are grouped under the ``hostdata`` family (the -queries are attached as methods to the QueryProvider). - -.. code-block:: yaml - :emphasize-lines: 5,6 - - metadata: - version: 1 - description: Kusto Queries - data_environments: [Kusto] - data_families: [hostdata] - database: DeviceEvents - cluster: https://uscluster.kusto.windows.net - -.. code-block:: yaml - :emphasize-lines: 5,6 - - # Deprecated format - metadata: - version: 1 - description: Kusto Queries - data_environments: [Kusto] - data_families: [hostdata.DeviceEvents] - cluster: https://uscluster.kusto.windows.net - -For this configuration the database used is ``DeviceEvents`` and the -queries will also be grouped under the DeviceEvents container. - -.. code-block:: yaml - :emphasize-lines: 5 - - # Deprecated format - metadata: - version: 1 - description: Kusto Queries - data_environments: [Kusto] - data_families: [DeviceEvents] - cluster: https://uscluster.kusto.windows.net - -.. note:: The when using the ``data_families`` entry to specify - the database name, only the first entry in the list is used - for this. Subsequent items still work for creating - data query groupings. - -Kusto cluster specifier -~~~~~~~~~~~~~~~~~~~~~~~ - -Adding a cluster specifier matches queries to the right cluster -and prevents a query from being used with -a cluster and database for which it was not intended. - -You can specify the cluster to use in three ways: - -* Including a ``cluster_groups`` item in the metadata section. - This is a list of cluster group names that are defined in the - ``msticpyconfig.yaml`` file. Queries with one or more ``cluster_groups`` - entries can be used against any of the cluster definitions in - ``msticpyconfig.yaml`` that have matching cluster group names. -* Including a ``clusters`` item in the metadata section. - This is a list of cluster identifiers (URIs, names or configuration section names - that are defined in the ``msticpyconfig.yaml`` file). These queries - can be used with any cluster configuration entry that matches one - of the IDs in the ``clusters`` item. -* Including a ``cluster`` item in the metadata section. - This is a single cluster identifier (URI, name or configuration section name - that is defined in the ``msticpyconfig.yaml`` file). These queries - can only be used with the cluster configuration entry that matches - the ID in the ``cluster`` item. - -The cluster specifiers are used in the order above until a match is found. -You can include more than one cluster specifier in a query definition file. -If no match is found, the query will not be added to the query provider. - -.. note:: For queries that have no cluster specifier, they will - be added to the query provider but but may not work. - -.. tip:: If you want to avoid these queries being added use - the parameter ``strict_query_match=True`` when - creating the Kusto QueryProvider as shown in the following - example - -.. code:: python3 - - import msticpy as mp - kql_prov = mp.QueryProvider("Kusto_New", strict_query_match=True) - - -The following examples show the different ways of configuring -clusters to match queries: - - -.. code-block:: yaml - :emphasize-lines: 6,7 - - metadata: - version: 1 - description: Kusto Queries - data_environments: [Kusto] - data_families: [hostdata] - cluster_groups: - - Group1 - database: DeviceEvents - -.. code-block:: yaml - :emphasize-lines: 6,7 - - metadata: - version: 1 - description: Kusto Queries - data_environments: [Kusto] - data_families: [hostdata] - clusters: - - https://uscluster.kusto.windows.net - - https://eucluster.kusto.windows.net - database: DeviceEvents - -.. code-block:: yaml - :emphasize-lines: 6 - - metadata: - version: 1 - description: Kusto Queries - data_environments: [Kusto] - data_families: [hostdata] - cluster: https://uscluster.kusto.windows.net - database: DeviceEvents - -.. note:: you can also use cluster specifiers (using the same syntax - as show above) for individual query metadata. Each query has - it's own optional ``metadata`` sub-key. Setting cluster - specifiers at the query level, with different queries assigned - to different clusters in the same file may make organizing - your queries more difficult, so we recommend only using - cluster specifiers at the file level. However, it is possible - to do this if you need to. - - -Logical flow used to determine if a query is shown -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This flowchart shows the logic applied using the query definition -and configuration parameters to determine whether a query is -shown or not (i.e. whether it appears in ``list_queries()`` and -as attached to the QueryProvider as a query function.) - -.. figure:: _static/kusto_query_display.png - :alt: Flow chart showing how queries are filtered based on query metadata - and configuration settings. - :height: 5in - -Other Kusto Documentation ------------------------------------ - -For examples of using the Kusto provider, see the samples -`Kusto Analysis Notebook `__ -and `Kusto Ingest Notebook `__ - -:py:mod:`Kusto driver API documentation` diff --git a/docs/source/data_acquisition/DataProv-Kusto.rst b/docs/source/data_acquisition/DataProv-Kusto.rst index c700180df..4d988b591 100644 --- a/docs/source/data_acquisition/DataProv-Kusto.rst +++ b/docs/source/data_acquisition/DataProv-Kusto.rst @@ -1,6 +1,48 @@ Azure Data Explorer/Kusto Provider ================================== +The Azure Data Explorer/Kusto +QueryProvider uses the +`azure-kusto-data SDK `__ +to connect to Azure Data Explorer clusters and provide +query capability. + + + +.. warning:: This provider replaces the an earlier implementation, + which used KqlMagic as the underlying data connector. + The previous driver is still available but to use it you must + specify ``Kusto_Legacy`` as the provider name when creating + the QueryProvider instance. + + For more information about the previous driver see + :doc:`./DataProv-Kusto-Legacy` + +Changes from the previous implementation +---------------------------------------- + +* The driver supports asynchronous execution of queries. This is used + when you create a Query provider with multiple connections (e.g. + to different clusters) and when you split queries into time chunks. + See :ref:`multiple_connections` and :ref:`splitting_query_execution` for + for more details. +* The settings format has changed (although the existing format + is still supported albeit with some limited functionality). +* Supports user-specified timeout for queries. +* Supports proxies (via MSTICPy config or the ``proxies`` parameter to + the ``connect`` method) +* You could previously specify a new cluster to connect to in + when executing a query. This is no longer supported. Once the + provider is connected to a cluster it will only execute queries against + that cluster. (You can however, call the ``connect()`` function to connect + the provider to a new cluster before running the query.) +* Some of the previous parameters have been deprecated: + + * The ``mp_az_auth`` parameter is replaced by ``auth_types`` (the former still works + but will be removed in a future release). + * ``mp_az_auth_tenant_id`` is replaced by ``tenant_id`` (the former + is no longer supported). + Kusto Configuration ------------------- @@ -13,290 +55,476 @@ For more information on using and configuring *msticpyconfig.yaml* see :doc:`msticpy Package Configuration <../getting_started/msticpyconfig>` and :doc:`MSTICPy Settings Editor<../getting_started/SettingsEditor>` -The settings in the file should look like the following two examples: +.. note:: The settings for the new Kusto provider are stored in the + ``KustoClusters`` section of the configuration file. This cannot + currently be edited from the MSTICPy Settings Editor - please + edit the *msticpyconfig.yaml* in a text editor to change these. -.. code:: yaml +To accommodate the use of multiple clusters, the new provider supports +a different configuration format. - DataProviders: - ... - Kusto: - Args: - Cluster: https://mstic.kusto.windows.net - IntegratedAuth: True +The basic settings in the file should look like the following: .. code:: yaml - DataProviders: + KustoClusters: ... - Kusto: + Cluster1: Args: - Cluster: https://msticapp.kusto.windows.net - ClientId: 69d28fd7-42a5-48bc-a619-af56397b1111 - TenantId: 69d28fd7-42a5-48bc-a619-af56397b9f28 - ClientSecret: "[PLACEHOLDER]" + Cluster: https://uscluster.kusto.windows.net + Cluster2: + Args: + Cluster: https://eucluster.kusto.windows.net + IntegratedAuth: True # This is default and is optional +You can have any number of cluster entries in this section. -We strongly recommend storing the client secret value -in Azure Key Vault. You can replace the text value with a referenced -to a Key Vault secret using the MSTICPy configuration editor. +Specifying additional parameters for a cluster +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Your configuration when using Key Vault should look like the following: +You can add authentication and other parameters to the ``Args`` +sub-key of a cluster definition. In the following example, +the TenantId is specified along with Client app ID and client secret +for *clientsecret* authentication. .. code:: yaml - Kusto: - Args: - Cluster: https://msticapp.kusto.windows.net - ClientId: 69d28fd7-42a5-48bc-a619-af56397b1111 - TenantId: 69d28fd7-42a5-48bc-a619-af56397b9f28 - ClientSecret: - KeyVault: + KustoClusters: + DataClusterX: + Args: + Cluster: https://xxx.kusto.windows.net + ClientId: 69d28fd7-42a5-48bc-a619-af56397b1111 + TenantId: 69d28fd7-42a5-48bc-a619-af56397b9f28 + ClientSecret: + KeyVault: + +The ClusterDefaults section +~~~~~~~~~~~~~~~~~~~~~~~~~~~ -You can create multiple instances of the Kusto settings for -multiple clusters by adding -an instance string to the "Kusto" section names +If you have parameters that you want to apply to all clusters, +you can add these to a ``ClusterDefaults`` section. .. code:: yaml - DataProviders: - ... - Kusto-mstic: + KustoClusters: + ClusterDefaults: Args: - Cluster: https://mstic.kusto.windows.net - IntegratedAuth: True - Kusto-mstic2: + TenantId: 69d28fd7-42a5-48bc-a619-af56397b9f28 + Cluster1: Args: - Cluster: https://mstic2.kusto.windows.net - IntegratedAuth: True - Kusto-msticapp: + Cluster: https://uscluster.kusto.windows.net + Cluster2: Args: - Cluster: https://msticapp.kusto.windows.net - ClientId: 69d28fd7-42a5-48bc-a619-af56397b1111 - TenantId: 69d28fd7-42a5-48bc-a619-af56397b9f28 - ClientSecret: - KeyVault: + Cluster: https://eucluster.kusto.windows.net + + +Creating ClusterGroups +~~~~~~~~~~~~~~~~~~~~~~ + +You can create a group of clusters that you can reference by +cluster group name. This is useful if you have clusters in different regions +that share the same schema and you want to run the same queries +against all of them. + +ClusterGroups are used primarily to support query templates, to match +queries to the correct cluster. See `Writing query templates for Kusto clusters`_ +later in this document. + +Loading a QueryProvider for Kusto +--------------------------------- + +.. code:: ipython3 + + import msticpy as mp + kql_prov = mp.QueryProvider("Kusto") + +Optional parameters +~~~~~~~~~~~~~~~~~~~ + +**timeout**: Query timeout in seconds, default is 240 seconds (4 minutes) +Maximum is 3600 seconds (1 hour). This can also be set in the +``connect`` call (see below) and overridden in query methods. +**proxies**: Proxy settings for Kusto queries. +Dictionary format is {protocol: proxy_url} +Where protocol is https, http, etc. and proxy_url can contain +optional authentication information in the format +"https://username:password@proxy_host:port" +If you have a proxy configuration in msticpyconfig.yaml and +you do not want to use it, set this to an empty dictionary. +This can be overridden in ``connect`` call (see below). -Data Query Format for Kusto clusters ------------------------------------- +.. note:: Proxy settings can also be configured globally in + *msticpyconfig.yaml* in the ``Proxies`` key of the ``msticpy`` + section. This will be used automatically if set unless you + override it in the ``proxies`` parameter in the + ``connect`` call. Set to an empty dictionary to disable + global proxy settings. + +Connecting to a Kusto cluster +----------------------------- + +Before running queries you need to connect to a cluster using +the ``connect()`` method. + +See +:py:meth:`connect() ` + +The parameters required for connection to a Kusto cluster can be passed +to ``connect()`` in +several of ways. You can provide a full connection string or parameters +for ``cluster`` (and optionally, ``database``). +In the latter case, you must have configured +settings for the cluster defined in your msticpyconfig.yaml. + +If you have the cluster details configured in msticpy, the ``cluster`` +parameter can be one of the following: + +* The section name ("Cluster1" or "Cluster2" in the configuration example above) +* The full URL of the cluster either the actual cluster name +* The host name of the cluster (e.g. "uscluster", "eucluster" in the example) + +In all cases these are case-insensitive. + +These are all equivalent: + +.. code:: ipython3 + + kql_prov.connect(cluster="Cluster2") + kql_prov.connect(cluster="eucluster") + kql_prov.connect(cluster="https://eucluster.kusto.windows.net") + + +If the cluster is not in your configuration you must use the full +URL of the cluster. + +You can optionally specify a default database to connect to. The database +can be changed with each query (either by specifying a ``database`` parameter +or by using the ``database`` metadata property in a query definition file +(see `Writing query templates for Kusto clusters`_) below) + +You can also pass authentication parameters in the ``connect`` call: + +* auth_types - to override the configured Azure credential types +* tenant_id - to override your default tenant_id + +.. code:: python3 + + kql_prov.connect( + cluster="Cluster2", + auth_types=["device_code"], + tenant_id="69d28fd7-42a5-48bc-a619-af56397b9f28" + ) + +For more details on Azure Authentication in *MSTICPy* see +:doc:`Azure Authentication <../getting_started/AzureAuthentication>` + +Other parameters +~~~~~~~~~~~~~~~~ + +**timeout**: Query timeout in seconds, default is 240 seconds (4 minutes) +Maximum is 3600 seconds (1 hour). This can also be set in the +``connect`` call (see below) and overridden in query methods. +**connection_str**: Provide a full connection string, including authentication +credentials. This can be used instead of the ``cluster`` parameter. + + +Kusto QueryProvider methods and properties +------------------------------------------ + +The Kusto QueryProvider has the following methods and properties +in addition to those inherited from the base QueryProvider class. + +* :py:meth:`get_database_names() ` + Returns the names of the databases for a connected cluster. +* :py:meth:`get_database_schema([database]) ` + Returns a schema dictionary for the tables in a database a connected cluster. +* :py:meth:`configured_clusters (property) ` +* Returns a list of the configured cluster read from msticpyconfig.yaml. +* :py:meth:`cluster_uri (property) ` + The URI of the connected cluster. +* :py:meth:`cluster_name (property) ` + The host name of the connected cluster. +* :py:meth:`cluster_config_name (property) ` + The configuration entry name for the connected cluster. +* :py:meth:`set_cluster(cluster) ` + Switch the provider to a different cluster - this is a more restricted version of the ``connect()`` method. +* :py:meth:`set_database(database) ` + Switches the default database for the provider. + +Running Ad Hoc queries +---------------------- + +You can run ad hoc queries using the ``exec_query()`` method of the QueryProvider. + +.. note:: You usually need to specify a ``database`` parameter when running + ad hoc queries. + +Writing query templates for Kusto clusters +------------------------------------------ + +The details for configuring and connecting to Kusto clusters +are enough to allow you to run ad hoc queries. However, if you want to +create and use parameterized queries there are some additional steps +that you need to take. + +Please read the general section on +:doc:`Creating new queries <../extending/Queries>` +if you are not familiar with the general process of creating query +templates for *MSTICPy*. + +The queries for Kusto work in the same way as for many other data providers +except that you can (and should) specify the cluster(s) and database for +the query to use. + +Controlling which queries are displayed and runnable for a provider +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Since Kusto clusters have widely varying schemas, it only makes sense +to run a query on a cluster for which it was designed. +MSTICPy enforces this by allowing you to specify parameters in +both the query template definitions and the cluster configuration +in ``msticpyconfig.yaml`` that correctly match queries to +providers connected to appropriate clusters. + +When you first instantiate a Kusto QueryProvider, it will read +all queries files available for the Kusto DataEnvironment. However, +when you connect to a cluster, these queries and filtered so that +only ones compatible with this cluster are available. + +If you have query definition files (query templates) you can +try this by creating a Kusto QueryProvider and running the +``list_queries()`` method. Then connect to a cluster and run +``list_queries()`` again. In the first case, you should see all +queries that you have defined but in the second case, you +should only see queries that have been built to run on that +cluster. + +.. code:: python3 + + from msticpy.data import QueryProvider + kql_prov = QueryProvider("Kusto") + kql_prov.list_queries() + +.. code:: python3 + + # new cell + kql_prov.connect(cluster="Cluster2") + kql_prov.list_queries() + +This is explained more in the later sections on `Kusto cluster specifier`_ +and + +Basic Kusto query format +~~~~~~~~~~~~~~~~~~~~~~~~ The query template format for Kusto queries should look like -the following. +the following. The ``data_environments`` item must include +"Kusto" in the list of applicable environments. -.. code:: yaml +This example show the metadata section for a query file, highlighting +the items that are specific Kusto queries. (``data_families`` is common +to other query types but has some Kusto-specific usage that is different +as explained later.) + +.. code-block:: + :emphasize-lines: 4-12 metadata: version: 1 description: Kusto Queries data_environments: [Kusto] data_families: [DeviceEvents.hostdata] - cluster: https://msticapp.kusto.windows.net + cluster: https://uscluster.kusto.windows.net + clusters: + - https://uscluster.kusto.windows.net + - https://eucluster.kusto.windows.net + cluster_groups: + - Group1 database: hostdata tags: ["user"] defaults: parameters: table: - description: Table name - type: str - default: "DeviceProcessEvents" - start: - description: Query start time - type: datetime - default: -30 - end: - description: Query end time - type: datetime - default: 0 - add_query_items: - description: Additional query clauses - type: str - default: "" + # .... sources: list_host_processes: - description: Lists all process creations for a host - metadata: - args: - query: ' - {table} - | where Timestamp >= datetime({start}) - | where Timestamp <= datetime({end}) - | where DeviceName has "{host_name}" - {add_query_items}' - uri: None - parameters: - host_name: - description: Name of host - type: str + description: Lists all process creations for a host + # .... -Most of the query file is identical to queries for other drivers. -However, the metadata section has additional items: ``cluster`` and -``database``. - -.. code-block:: yaml - :emphasize-lines: 4, 5, 6 - - metadata: - version: 1 - description: Kusto Queries - data_environments: [Kusto] - data_families: [ALIAS[.DATABASE]] - cluster: KUSTO_CLUSTER_URI - database: DATABASE +Most of the query file is identical to queries for other drivers. +However, the metadata section has some additional items. These +are explained in the following sections. -The ``data_environments`` item must include "Kusto" in the list of -applicable environments. - -You can specify the Kusto database to use in one of two ways: - -1. Use the ``database`` key. - Add the name of the database to connect to. The ``data_families`` key - is used as a container name when adding attributes. Whatever string - you specify here will be added as a prefix to the query name before attaching - the query to the query provider. - -2. Encode the database in the ``data_families`` item. If you do not - specify a database key explicitly, you should use a dot-separated string - for the data_families item: +Kusto database specifier +~~~~~~~~~~~~~~~~~~~~~~~~ - - the first part (before the dot) is an alias that will be used as a prefix - when the queries are added to the query provider. - - the second part is the Kusto database containing the data to be queried. +You can use the ``database`` item to specify the cluster database to +use. For backward compatibility you can also specify this in the +``data_families`` entry using a dotted notation. ``data_families`` +is also used to group queries in the query provider, so using this +to specify the database name is not recommended. -The ``cluster`` item in the query template file must match the ``Cluster`` -setting in the *msticpyconfig* setting described in the previous section. +The following examples show the different ways of configuring +this. -Here is are two examples. +For the following two configurations, the database used is ``DeviceEvents`` +and the queries are grouped under the ``hostdata`` family (the +queries are attached as methods to the QueryProvider). .. code-block:: yaml + :emphasize-lines: 5,6 metadata: version: 1 description: Kusto Queries data_environments: [Kusto] - data_families: [DeviceEvents] - database: hostdata - cluster: https://msticapp.kusto.windows.net + data_families: [hostdata] + database: DeviceEvents + cluster: https://uscluster.kusto.windows.net .. code-block:: yaml + :emphasize-lines: 5,6 + # Deprecated format metadata: version: 1 description: Kusto Queries data_environments: [Kusto] - data_families: [DeviceEvents.hostdata] - cluster: https://msticapp.kusto.windows.net + data_families: [hostdata.DeviceEvents] + cluster: https://uscluster.kusto.windows.net -Queries using either of these metadata sections would be accessed and run as follows: +For this configuration the database used is ``DeviceEvents`` and the +queries will also be grouped under the DeviceEvents container. -.. code:: ipython3 - - kql_prov.DeviceEvents.list_host_processes(host_name="my_host", ...) - -The file-level ``metadata`` section applies to all queries in the file by -default. You can specify a metadata section for individual queries. Any -settings here will override the file-level settings. - -The example below shows overriding the ``data_families`` and ``cluster`` -entries for an individual query. - -.. code:: yaml +.. code-block:: yaml + :emphasize-lines: 5 + # Deprecated format metadata: version: 1 description: Kusto Queries data_environments: [Kusto] - data_families: [DeviceEvents.hostdata] - cluster: https://msticapp.kusto.windows.net - tags: ["user"] - defaults: - parameters: - table: - description: Table name - type: str - default: "DeviceProcessEvents" - # ... - sources: - list_host_processes: - description: Lists all process creations for a host - metadata: - data_families: [DeviceEvents.scrubbeddata] - cluster: https://msticapp.kusto.windows.net - args: - query: ' - {table} - | where Timestamp >= datetime({start}) - | where Timestamp <= datetime({end}) - | where DeviceName has "{host_name}" - {add_query_items}' - uri: None - parameters: - host_name: - description: Name of host - type: str + data_families: [DeviceEvents] + cluster: https://uscluster.kusto.windows.net -Loading a QueryProvider for Kusto ---------------------------------- +.. note:: The when using the ``data_families`` entry to specify + the database name, only the first entry in the list is used + for this. Subsequent items still work for creating + data query groupings. -.. code:: ipython3 +Kusto cluster specifier +~~~~~~~~~~~~~~~~~~~~~~~ - kql_prov = QueryProvider("Kusto") +Adding a cluster specifier matches queries to the right cluster +and prevents a query from being used with +a cluster and database for which it was not intended. +You can specify the cluster to use in three ways: +* Including a ``cluster_groups`` item in the metadata section. + This is a list of cluster group names that are defined in the + ``msticpyconfig.yaml`` file. Queries with one or more ``cluster_groups`` + entries can be used against any of the cluster definitions in + ``msticpyconfig.yaml`` that have matching cluster group names. +* Including a ``clusters`` item in the metadata section. + This is a list of cluster identifiers (URIs, names or configuration section names + that are defined in the ``msticpyconfig.yaml`` file). These queries + can be used with any cluster configuration entry that matches one + of the IDs in the ``clusters`` item. +* Including a ``cluster`` item in the metadata section. + This is a single cluster identifier (URI, name or configuration section name + that is defined in the ``msticpyconfig.yaml`` file). These queries + can only be used with the cluster configuration entry that matches + the ID in the ``cluster`` item. -Connecting to a Kusto cluster ------------------------------ +The cluster specifiers are used in the order above until a match is found. +You can include more than one cluster specifier in a query definition file. +If no match is found, the query will not be added to the query provider. -If you are using query files (as described above) you do not need to explicitly -connect - the connection will be made dynamically using the parameters in the -query definition. +.. note:: For queries that have no cluster specifier, they will + be added to the query provider but but may not work. -To run add-hoc queries however, you need to explicitly connect to a cluster and -database. The parameters required for connection to a Kusto cluster can be passed in -a number of ways. You can provide a full connection string or parameters -for ``cluster`` and ``database``. In the latter case, you must have configured -settings for the cluster defined in your msticpyconfig.yaml. +.. tip:: If you want to avoid these queries being added use + the parameter ``strict_query_match=True`` when + creating the Kusto QueryProvider as shown in the following + example -The ``cluster`` name can be either the actual cluster name or the alias -that you used in your settings (i.e. the ``INSTANCE`` value in ``Kusto-INSTANCE`` -configuration key). To connect, you must also specify a valid database -name in the cluster. +.. code:: python3 + import msticpy as mp + kql_prov = mp.QueryProvider("Kusto_New", strict_query_match=True) -.. code:: ipython3 - kql_prov.connect(cluster="msticapp", database="hostdata") +The following examples show the different ways of configuring +clusters to match queries: -If you have queries defined (in template files) for multiple clusters -and databases, you do not need to connect explicitly to each one. -You can call these queries by name - the driver will dynamically -read the connection parameters from the query file and attempt -to authenticate to the cluster. +.. code-block:: yaml + :emphasize-lines: 6,7 -Additional Kusto query parameters ---------------------------------- + metadata: + version: 1 + description: Kusto Queries + data_environments: [Kusto] + data_families: [hostdata] + cluster_groups: + - Group1 + database: DeviceEvents + +.. code-block:: yaml + :emphasize-lines: 6,7 -You can override the cluster and database for an individual -query by supply the ``cluster`` and/or ``database`` parameters -as query parameters. + metadata: + version: 1 + description: Kusto Queries + data_environments: [Kusto] + data_families: [hostdata] + clusters: + - https://uscluster.kusto.windows.net + - https://eucluster.kusto.windows.net + database: DeviceEvents +.. code-block:: yaml + :emphasize-lines: 6 -.. code:: ipython3 + metadata: + version: 1 + description: Kusto Queries + data_environments: [Kusto] + data_families: [hostdata] + cluster: https://uscluster.kusto.windows.net + database: DeviceEvents - kql_prov.DeviceEvents.list_host_processes( - host_name="my_host", - cluster="https://somecluster.kusto.windows.net", - database="archive" - ... - ) +.. note:: you can also use cluster specifiers (using the same syntax + as show above) for individual query metadata. Each query has + it's own optional ``metadata`` sub-key. Setting cluster + specifiers at the query level, with different queries assigned + to different clusters in the same file may make organizing + your queries more difficult, so we recommend only using + cluster specifiers at the file level. However, it is possible + to do this if you need to. + + +Logical flow used to determine if a query is shown +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This flowchart shows the logic applied using the query definition +and configuration parameters to determine whether a query is +shown or not (i.e. whether it appears in ``list_queries()`` and +as attached to the QueryProvider as a query function.) +.. figure:: _static/kusto_query_display.png + :alt: Flow chart showing how queries are filtered based on query metadata + and configuration settings. + :height: 5in Other Kusto Documentation ----------------------------------- For examples of using the Kusto provider, see the samples -`Kusto Analysis Notebook` -and `Kusto Ingest Notebook` +`Kusto Analysis Notebook `__ +and `Kusto Ingest Notebook `__ -:py:mod:`Kusto driver API documentation` \ No newline at end of file +:py:mod:`Kusto driver API documentation` diff --git a/docs/source/data_acquisition/DataProv-MSSentinel-Legacy.rst b/docs/source/data_acquisition/DataProv-MSSentinel-Legacy.rst new file mode 100644 index 000000000..db670b07f --- /dev/null +++ b/docs/source/data_acquisition/DataProv-MSSentinel-Legacy.rst @@ -0,0 +1,254 @@ +Microsoft Sentinel Provider - Legacy Version +============================================ + +.. warning:: This provider has been replaced by one based on the + Azure Monitor SDK. Unless you have a specific need to use this + version, we recommend that you use the new version. See + :doc:`../data_acquisition/DataProv-MSSentinel` for more + information. + +.. note:: This provider is still supported but will not be updated + with new features. + In order to use it you to have install KqlMagic installed. This is + no longer installed by default with *MSTICPy*. + You can install this with ``pip install Kqlmagic`` or + ``pip install msticpy[kql]``. + +Sentinel Configuration +---------------------- + +You can store configuration for your workspace (or workspaces) in either +your ``msticpyconfig.yaml`` or a ``config.json`` file. The latter +file is auto-created in your Azure Machine Learning (AML) workspace when +you launch a notebook from the Sentinel portal. It can however, only +store details for a single workspace. + +Sentinel Configuration in *msticpyconfig.yaml* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is the simplest place to store your workspace details. + +You likely need to use a *msticpyconfig.yaml* anyway. If you are using other +*msticpy* features such as Threat Intelligence Providers, GeoIP Lookup, Azure Data, +etc., these all have their own configuration settings, so using a single +configuration file makes managing your settings easier. If you are running +notebooks in an AML workspace and you do not have a *msticpyconfig.yaml* +*MSTICPy* will create one and import settings from a *config.json*, if it can find +one. + +For more information on using and configuring *msticpyconfig.yaml* see +:doc:`msticpy Package Configuration <../getting_started/msticpyconfig>` +and :doc:`MSTICPy Settings Editor<../getting_started/SettingsEditor>` + +The MS Sentinel connection settings are stored in the +``AzureSentinel\\Workspaces`` section of the file. Here is an example. + +.. code:: yaml + + AzureSentinel: + Workspaces: + # Workspace used if you don't explicitly name a workspace when creating WorkspaceConfig + # Specifying values here overrides config.json settings unless you explicitly load + # WorkspaceConfig with config_file parameter (WorkspaceConfig(config_file="../config.json") + Default: + WorkspaceId: 271f17d3-5457-4237-9131-ae98a6f55c37 + TenantId: 335b56ab-67a2-4118-ac14-6eb454f350af + ResourceGroup: soc + SubscriptionId: a5b24e23-a96a-4472-b729-9e5310c83e20 + WorkspaceName: Workspace1 + # To use these launch with an explicit name - WorkspaceConfig(workspace_name="Workspace2") + Workspace1: + WorkspaceId: "c88dd3c2-d657-4eb3-b913-58d58d811a41" + TenantId: "335b56ab-67a2-4118-ac14-6eb454f350af" + ResourceGroup: soc + SubscriptionId: a5b24e23-a96a-4472-b729-9e5310c83e20 + WorkspaceName: Workspace1 + TestWorkspace: + WorkspaceId: "17e64332-19c9-472e-afd7-3629f299300c" + TenantId: "4ea41beb-4546-4fba-890b-55553ce6003a" + ResourceGroup: soc + SubscriptionId: a5b24e23-a96a-4472-b729-9e5310c83e20 + WorkspaceName: Workspace2 + +If you only use a single workspace, you only need to create a ``Default`` entry and +add the values for your *WorkspaceID* and *TenantID*. You can add other entries here, +for example, SubscriptionID, ResourceGroup. These are recommended but not required +for the QueryProvider (they may be used by other *MSTICPy* components however). + +.. note:: The property names are spelled differently to the values in the + *config.json* so be sure to enter these as shown in the example. These + names are case-sensitive. + +.. note:: The section names (Default, Workspace1 and TestWorkspace) do + not have to be the same as the workspace name - you can choose friendlier + aliases, if you wish. + +If you use multiple workspaces, you can add further entries here. Each +workspace entry is normally the name of the Azure Sentinel workspace but +you can use any name you prefer. + +Sentinel Configuration in *config.json* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When you load a notebook from the MS Sentinel UI a configuration file *config.json* +is provisioned for you with the details of the source workspace populated in +the file. An example is shown here. + +.. code:: json + + { + "tenant_id": "335b56ab-67a2-4118-ac14-6eb454f350af", + "subscription_id": "b8f250f8-1ba5-4b2c-8e74-f7ea4a1df8a6", + "resource_group": "ExampleWorkspaceRG", + "workspace_id": "271f17d3-5457-4237-9131-ae98a6f55c37", + "workspace_name": "ExampleWorkspace" + } + +If no *msticpyconfig.yaml* is found *MSTICPy* will automatically look for a +*config.json* file in the current +directory. If not found here, it will search the parent directory and in all +its subdirectories. It will use the first *config.json* file found. + + +Loading a QueryProvider for Microsoft Sentinel +---------------------------------------------- + +.. code:: ipython3 + + qry_prov = QueryProvider( + data_environment="MSSentinel_Legacy", + ) + + +Connecting to a MS Sentinel Workspace +------------------------------------- + +Once we have instantiated the QueryProvider we need to authenticate to Sentinel +Workspace. This is done by calling the connect() function of the Query +Provider. + +connect() requires a connection string as its parameter. For MS Sentinel +we can use the ``WorkspaceConfig`` class. + +WorkspaceConfig +~~~~~~~~~~~~~~~ + +.. note:: From v2.0.0 of MSTICPy the MS Sentinel QueryProvider + will automatically create a WorkspaceConfig from your settings. + Simply call ``connect`` with a ``workspace="YourWorkspace"`` parameter + + +``WorkspaceConfig`` handles loading your workspace configuration and generating a +connection string from your configuration. +See :py:mod:`WorkspaceConfig API documentation` + +``WorkspaceConfig`` works with workspace configuration stored in *msticpyconfig.yaml* +or *config.json* (although the former takes precedence). + +To use ``WorkspaceConfig``, simple create an instance of it. It will automatically build +your connection string for use with the query provider library. + +.. code:: IPython + + >>> ws_config = WorkspaceConfig() + >>> ws_config.code_connect_str + + "loganalytics://code().tenant('335b56ab-67a2-4118-ac14-6eb454f350af').workspace('271f17d3-5457-4237-9131-ae98a6f55c37')" + +You can use this connection string in the call to ``QueryProvider.connect()`` + +When called without parameters, *WorkspaceConfig* loads the "Default" +entry in your *msticpyconfig.yaml* (or falls back to loading the settings +in *config.json*). To specify a different workspace pass the ``workspace`` parameter +with the name of your workspace entry. This value is the name of +the section in the *msticpyconfig* ``Workspaces`` section, which may +not necessarily be the same as your workspace name. + +.. code:: IPython + + >>> ws_config = WorkspaceConfig(workspace="TestWorkspace") + + +To see which workspaces are configured in your *msticpyconfig.yaml* use +the ``list_workspaces()`` function. + +.. tip:: ``list_workspaces`` is a class function, so you do not need to + instantiate a WorkspaceConfig to call this function. + +.. code:: IPython + + >>> WorkspaceConfig.list_workspaces() + + {'Default': {'WorkspaceId': '271f17d3-5457-4237-9131-ae98a6f55c37', + 'TenantId': '335b56ab-67a2-4118-ac14-6eb454f350af'}, + 'Workspace1': {'WorkspaceId': 'c88dd3c2-d657-4eb3-b913-58d58d811a41', + 'TenantId': '335b56ab-67a2-4118-ac14-6eb454f350af'}, + 'TestWorkspace': {'WorkspaceId': '17e64332-19c9-472e-afd7-3629f299300c', + 'TenantId': '4ea41beb-4546-4fba-890b-55553ce6003a'}} + +Entries in msticpyconfig always take precedence over settings in your +config.json. If you want to force use of the config.json, specify the path +to the config.json file in the ``config_file`` parameter to ``WorkspaceConfig``. + +If you need use a specific instance of a config.json you can specify a full +path to the file you want to use when you create your ``WorkspaceConfig`` +instance. + + +Connecting to the workspace +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When connecting you can just pass the name of your workspace or +an instance of WorkspaceConfig to the query provider's ``connect`` method. + +.. code:: IPython + + qry_prov.connect(workspace="Default") + qry_prov.connect(workspace="MyOtherWorkspace") + + # or, passing WorkspaceConfig + qry_prov.connect(WorkspaceConfig()) + # or + qry_prov.connect(WorkspaceConfig(workspace="MyOtherWorkspace")) + + + +MS Sentinel Authentication options +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default, the data provider tries to use chained authentication, +attempting to use existing Azure credentials, if they are available. + +- If you are running in an AML workspace, it will attempt to use + integrated MSI authentication, using the identity that you used to + authenticate to AML. +- If you have logged in to Azure CLI, the Sentinel provider will + try to use your AzureCLI credentials +- If you have your credentials stored as environment variables, it + will try to use those +- Finally, it will fall back on using interactive browser-based + device authentication. + +If you are using a Sovereign cloud rather than the Azure global cloud, +you should select the appropriate cloud in the Azure section of +the *msticpyconfig*. + +.. warning:: Although msticpy allows you to configure multiple entries for + workspaces in different tenants, you cannot currently authenticate to workspaces + that span multiple tenants in the same notebook. If you need to do this, you + should investigate + `Azure Lighthouse `__. + This allows delegated access to workspaces in multiple tenants from a single + tenant. + +For more details on Azure authentication see :doc:`../getting_started/AzureAuthentication`. + +Other MS Sentinel Documentation +------------------------------- + +For examples of using the MS Defender provider, see the sample +`M365 Defender Notebook` + +Built-in :ref:`data_acquisition/DataQueries:Queries for Microsoft Sentinel`. + +:py:mod:`Sentinel KQL driver API documentation` diff --git a/docs/source/data_acquisition/DataProv-MSSentinel-New.rst b/docs/source/data_acquisition/DataProv-MSSentinel-New.rst deleted file mode 100644 index 4abe5ccab..000000000 --- a/docs/source/data_acquisition/DataProv-MSSentinel-New.rst +++ /dev/null @@ -1,311 +0,0 @@ -Microsoft Sentinel Provider - New Implementation -================================================ - -This is a new implementation of the MS Sentinel QueryProvider using -the -`azure-monitor-query SDK `__ -(the earlier implementation used -`Kqlmagic `__) - -.. warning:: This provider currently in beta and is available for testing. - It is available alongside the existing Sentinel provider for you - to compare old and new. To use it you will need the ``azure-monitor-query`` - package installed. You can install this with ``pip install azure-monitor-query`` - or ``pip install msticpy[azure_query]``. - If you are using the existing implementation, see :doc:`./DataProv-MSSentinel` - -Changes from the previous implementation ----------------------------------------- - -* Use the provider name ``MSSentinel_New`` when creating a QueryProvider - instance. -* By default, it uses the *MSTICPy* built-in Azure authentication by - default - you do not have to specify parameters to enable this. -* Supports simultaneous queries against multiple workspaces (see below). -* Supports user-specified timeout for queries. -* Supports proxies (via MSTICPy config or the ``proxies`` parameter to - the ``connect`` method) -* The driver supports asynchronous execution of queries. This is used - when you create a Query provider with multiple connections (e.g. - to different clusters) and when you split queries into time chunks. - See :ref:`multiple_connections` and :ref:`splitting_query_execution` for - for more details. This is independent of the ability to specify - multiple workspaces in a single connection as described above. -* Some of the previous parameters have been deprecated: - - * ``mp_az_auth`` is replaced by ``auth_types`` (the former still works - but will be removed in a future release). - * ``mp_az_auth_tenant_id`` is replaced by ``tenant_id`` (the former - is no longer supported - - -Sentinel Configuration ----------------------- - -You store configuration for your workspace (or workspaces) in -your ``msticpyconfig.yaml``. - -For more information on using and configuring *msticpyconfig.yaml* see -:doc:`msticpy Package Configuration <../getting_started/msticpyconfig>` -and :doc:`MSTICPy Settings Editor<../getting_started/SettingsEditor>` - -The MS Sentinel connection settings are stored in the -``AzureSentinel\\Workspaces`` section of the file. -Here is an example. - -.. code:: yaml - - AzureSentinel: - Workspaces: - # Workspace used if you don't explicitly name a workspace when creating WorkspaceConfig - # Specifying values here overrides config.json settings unless you explicitly load - # WorkspaceConfig with config_file parameter (WorkspaceConfig(config_file="../config.json") - Default: - WorkspaceId: 271f17d3-5457-4237-9131-ae98a6f55c37 - TenantId: 335b56ab-67a2-4118-ac14-6eb454f350af - ResourceGroup: soc - SubscriptionId: a5b24e23-a96a-4472-b729-9e5310c83e20 - WorkspaceName: Workspace1 - # To use these launch with an explicit name - WorkspaceConfig(workspace_name="Workspace2") - Workspace1: - WorkspaceId: "c88dd3c2-d657-4eb3-b913-58d58d811a41" - TenantId: "335b56ab-67a2-4118-ac14-6eb454f350af" - ResourceGroup: soc - SubscriptionId: a5b24e23-a96a-4472-b729-9e5310c83e20 - WorkspaceName: Workspace1 - TestWorkspace: - WorkspaceId: "17e64332-19c9-472e-afd7-3629f299300c" - TenantId: "4ea41beb-4546-4fba-890b-55553ce6003a" - ResourceGroup: soc - SubscriptionId: a5b24e23-a96a-4472-b729-9e5310c83e20 - WorkspaceName: Workspace2 - -If you only use a single workspace, you only need to create a ``Default`` entry and -add the values for your *WorkspaceID* and *TenantID*. You can add other entries here, -for example, SubscriptionID, ResourceGroup. These are not required for the data -queries but are recommended since they are used by other *MSTICPy* components. - -If you use multiple workspaces, you can add further entries here. The key for -each entry (e.g. ``Workspace1`` or ``TestWorkspace`` in the example above) -is normally the name of the Azure Sentinel workspace but -you can use any name you prefer. You use this entry name when connecting -to a workspace. - - -Loading a QueryProvider for Microsoft Sentinel ----------------------------------------------- - -.. code:: ipython3 - - qry_prov = QueryProvider( - data_environment="MSSentinel_New", - ) - - # or just - qry_prov = QueryProvider("MSSentinel_New") - -Optional parameters for the Sentinel QueryProvider -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -``timeout`` : int (seconds) - -Specify a timeout for queries. Default is 300 seconds. -This parameter can be set here or in the ``connect`` method -and overridden for individual queries. - -``proxies`` : Dict[str, str] - -Proxy settings for log analytics queries. -If proxies are configured in *msticpyconfig.yaml* this is used by default. -If specified as a parameter, specify proxies as a dictionary of the form -``{protocol: proxy_url}`` - -The only protocol used by the driver is "https" (other protocols -can be set in *msticpyconfig.yaml* but only https is used here). -The proxy_url can contain -optional authentication information in the format -"https://username:password@proxy_host:port" - -If you have a proxy configuration set in *msticpyconfig.yaml* and -you do not want to use it, set ``proxies`` to None or an empty dictionary. -This parameter can be overridden in connect method. - -Connecting to a MS Sentinel Workspace -------------------------------------- - -Once you've created a QueryProvider you need to authenticate to Sentinel -Workspace. This is done by calling the connect() function of the Query -Provider. See -:py:meth:`connect() ` - -This function takes an initial parameter (called ``connection_str`` for -historical reasons) that can be one of the following: - -* A WorkspaceConfig instance -* A connection string (this is option is being deprecated) -* None - in this case it will connect with the ``Default`` entry from - your *msticpyconfig.yaml* file. - -If you omit this parameter you use the ``workspace`` parameter -to specify the workspace entry from ``msticpyconfig.yaml`` to use. - - -Connecting to a Sentinel workspace -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When connecting you can just pass the name of your workspace or -an instance of WorkspaceConfig to the query provider's ``connect`` method. - -.. code:: IPython - - qry_prov.connect(workspace="Default") - qry_prov.connect(workspace="MyOtherWorkspace") - - # or, passing WorkspaceConfig - qry_prov.connect(WorkspaceConfig()) - # or - qry_prov.connect(WorkspaceConfig(workspace="MyOtherWorkspace")) - - -MS Sentinel Authentication options -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -By default, the data provider will use Azure authentication -following the parameters defined in your ``msticpyconfig.yaml`` file -(or the default values if you have not configured them in this file). - -To read more about Azure authentication see -:doc:`Azure Authentication <../getting_started/AzureAuthentication>` - -You can override several authentication parameters including: - -* auth_types - a list of authentication types to try in order -* tenant_id - the Azure tenant ID to use for authentication - -If you are using a Sovereign cloud rather than the Azure global cloud, -you should follow the guidance in -:doc:`Azure Authentication <../getting_started/AzureAuthentication>` -to configure the correct cloud. - - - -Connecting to multiple Sentinel workspaces -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -There are two mechanisms for querying multiple MS Sentinel workspaces. -One is a generic method common to all data providers. For more -information on this see :ref:`multiple_connections` in the main -Data Providers documentation. - -The other is specific to the Sentinel data provider and is provided -by the underlying Azure Monitor client. This latter capability is described in -this section. - -The Sentinel data provider supports connecting to multiple workspaces by -passing a list of workspace names or workspace IDs to the ``connect`` method. -using the ``workspaces`` or ``workspace_ids`` parameters respectively. - -``workspace_ids`` should be a list or tuple of workspace IDs. - -``workspaces`` should be a list or tuple of workspace names. In order -to use this parameter you must have these workspaces configured in -your *msticpyconfig.yaml*. - -These parameters override the ``workspace`` parameter. - -Connecting to multiple workspaces allows you to run queries across these -workspaces and return the combined results as a single Pandas DataFrame. -The workspaces must use common authentication credentials and are -expected to have the same data schema. - -.. code:: ipython3 - - qry_prov.connect(workspaces=["Default", "MyOtherWorkspace"]) - - qry_prov.SecurityAlert.list_alerts() - -This will return a DataFrame containing the results of the query, -the results from each workspace will be indicated by the -``TenantId`` column, which will contain the workspace ID of -each workspace. - -.. note:: This is a mechanism implemented by the underlying - **azure-monitor-query** - client library. It is independent of the MSTICPy capability to - add multiple connections to a query provider (and run parallel - queries against each workspace). You can use either of these - but we recommended using - one or the other and not both simultaneously. - -.. warning:: Connecting to multiple workspaces like this means - that the ``schema`` property will not return anything. This - only works if you connect to a single workspace. In this case, - it will return the schema of this workspace. - - -Other parameters for Sentinel ``connect()`` method -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For ``timeout`` and ``proxies`` see the section above. - -After connecting to -The WorkspaceConfig class -------------------------- - -You do not need to know the details of this class but it is used -behind the scenes to provide workspace configuration information -to the Sentinel data provider. - -``WorkspaceConfig`` handles loading your workspace configuration -and generating a connection string from your configuration. -See :py:mod:`WorkspaceConfig API documentation` - -``WorkspaceConfig`` works with workspace configuration stored in *msticpyconfig.yaml*. - -To use ``WorkspaceConfig``, simple create an instance of it. It will automatically build -your connection string for use with the query provider library. - -.. code:: python3 - - ws_config = WorkspaceConfig() - -When called without parameters, *WorkspaceConfig* loads the "Default" -entry in your *msticpyconfig.yaml*. To specify a different workspace pass the ``workspace`` parameter -with the name of your workspace entry. This value is the name of -the section in the ``msticpyconfig.yaml`` ``Workspaces`` section. - -.. note:: the ``workspace`` parameter value is the entry heading in - your ``msticpyconfig.yaml``. As mentioned above, this may - not necessarily be the same as your workspace name. - -.. code:: python3 - - ws_config = WorkspaceConfig(workspace="TestWorkspace") - - -To see which workspaces are configured in your *msticpyconfig.yaml* use -the ``list_workspaces()`` function. - -.. tip:: ``list_workspaces`` is a class function, so you do not need to - instantiate a WorkspaceConfig to call this function. - -.. code:: python3 - - WorkspaceConfig.list_workspaces() - -.. parsed-literal:: - - {'Default': {'WorkspaceId': '271f17d3-5457-4237-9131-ae98a6f55c37', - 'TenantId': '335b56ab-67a2-4118-ac14-6eb454f350af'}, - 'Workspace1': {'WorkspaceId': 'c88dd3c2-d657-4eb3-b913-58d58d811a41', - 'TenantId': '335b56ab-67a2-4118-ac14-6eb454f350af'}, - 'TestWorkspace': {'WorkspaceId': '17e64332-19c9-472e-afd7-3629f299300c', - 'TenantId': '4ea41beb-4546-4fba-890b-55553ce6003a'}} - - -Other MS Sentinel Documentation -------------------------------- - -Built-in :ref:`data_acquisition/DataQueries:Queries for Microsoft Sentinel`. - -See also: :py:mod:`Sentinel KQL driver API documentation ` diff --git a/docs/source/data_acquisition/DataProv-MSSentinel.rst b/docs/source/data_acquisition/DataProv-MSSentinel.rst index aa10109cd..52f40ccde 100644 --- a/docs/source/data_acquisition/DataProv-MSSentinel.rst +++ b/docs/source/data_acquisition/DataProv-MSSentinel.rst @@ -1,34 +1,57 @@ Microsoft Sentinel Provider =========================== -Sentinel Configuration ----------------------- - -You can store configuration for your workspace (or workspaces) in either -your ``msticpyconfig.yaml`` or a ``config.json`` file. The latter -file is auto-created in your Azure Machine Learning (AML) workspace when -you launch a notebook from the Sentinel portal. It can however, only -store details for a single workspace. +Th MS Sentinel QueryProvider uses +the +`azure-monitor-query SDK `__ +to connect to Microsoft Sentinel workspaces. + +.. note:: This provider replaces an earlier version, + which used KqlMagic as the underlying data connector. + The previous driver is still available but to use it you must + specify ``MSSentinel_Legacy`` as the provider name when creating + the QueryProvider instance. + + For more information about the previous driver see + :doc:`./DataProv-MSSentinel-Legacy` + + +Changes from the previous implementation +---------------------------------------- + +* Uses the *MSTICPy* built-in Azure authentication by + default - you do not have to specify parameters to enable this. +* Supports simultaneous queries against multiple workspaces (see below). +* Supports user-specified timeout for queries. +* Supports proxies (via MSTICPy config or the ``proxies`` parameter to + the ``connect`` method) +* The driver supports asynchronous execution of queries. This is used + when you create a Query provider with multiple connections (e.g. + to different clusters) and when you split queries into time chunks. + See :ref:`multiple_connections` and :ref:`splitting_query_execution` for + for more details. This is independent of the ability to specify + multiple workspaces in a single connection as described above. +* Some of the previous parameters have been deprecated: + + * ``mp_az_auth`` is replaced by ``auth_types`` (the former still works + but will be removed in a future release). + * ``mp_az_auth_tenant_id`` is replaced by ``tenant_id`` (the former + is no longer supported -Sentinel Configuration in *msticpyconfig.yaml* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This is the simplest place to store your workspace details. +Sentinel Configuration +---------------------- -You likely need to use a *msticpyconfig.yaml* anyway. If you are using other -*msticpy* features such as Threat Intelligence Providers, GeoIP Lookup, Azure Data, -etc., these all have their own configuration settings, so using a single -configuration file makes managing your settings easier. If you are running -notebooks in an AML workspace and you do not have a *msticpyconfig.yaml* -*MSTICPy* will create one and import settings from a *config.json*, if it can find -one. +You store configuration for your workspace (or workspaces) in +your ``msticpyconfig.yaml``. For more information on using and configuring *msticpyconfig.yaml* see :doc:`msticpy Package Configuration <../getting_started/msticpyconfig>` and :doc:`MSTICPy Settings Editor<../getting_started/SettingsEditor>` The MS Sentinel connection settings are stored in the -``AzureSentinel\\Workspaces`` section of the file. Here is an example. +``AzureSentinel\\Workspaces`` section of the file. +Here is an example. .. code:: yaml @@ -59,186 +82,231 @@ The MS Sentinel connection settings are stored in the If you only use a single workspace, you only need to create a ``Default`` entry and add the values for your *WorkspaceID* and *TenantID*. You can add other entries here, -for example, SubscriptionID, ResourceGroup. These are recommended but not required -for the QueryProvider (they may be used by other *MSTICPy* components however). - -.. note:: The property names are spelled differently to the values in the - *config.json* so be sure to enter these as shown in the example. These - names are case-sensitive. +for example, SubscriptionID, ResourceGroup. These are not required for the data +queries but are recommended since they are used by other *MSTICPy* components. -.. note:: The section names (Default, Workspace1 and TestWorkspace) do - not have to be the same as the workspace name - you can choose friendlier - aliases, if you wish. +If you use multiple workspaces, you can add further entries here. The key for +each entry (e.g. ``Workspace1`` or ``TestWorkspace`` in the example above) +is normally the name of the Azure Sentinel workspace but +you can use any name you prefer. You use this entry name when connecting +to a workspace. -If you use multiple workspaces, you can add further entries here. Each -workspace entry is normally the name of the Azure Sentinel workspace but -you can use any name you prefer. -Sentinel Configuration in *config.json* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Loading a QueryProvider for Microsoft Sentinel +---------------------------------------------- -When you load a notebook from the MS Sentinel UI a configuration file *config.json* -is provisioned for you with the details of the source workspace populated in -the file. An example is shown here. +.. code:: ipython3 -.. code:: json + qry_prov = QueryProvider( + data_environment="MSSentinel_New", + ) - { - "tenant_id": "335b56ab-67a2-4118-ac14-6eb454f350af", - "subscription_id": "b8f250f8-1ba5-4b2c-8e74-f7ea4a1df8a6", - "resource_group": "ExampleWorkspaceRG", - "workspace_id": "271f17d3-5457-4237-9131-ae98a6f55c37", - "workspace_name": "ExampleWorkspace" - } + # or just + qry_prov = QueryProvider("MSSentinel_New") -If no *msticpyconfig.yaml* is found *MSTICPy* will automatically look for a -*config.json* file in the current -directory. If not found here, it will search the parent directory and in all -its subdirectories. It will use the first *config.json* file found. +Optional parameters for the Sentinel QueryProvider +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``timeout`` : int (seconds) -Loading a QueryProvider for Microsoft Sentinel ----------------------------------------------- +Specify a timeout for queries. Default is 300 seconds, +the maximum is 600 seconds (10 minutes). +This parameter can be set here or in the ``connect`` method +and overridden for individual queries. -.. code:: ipython3 +``proxies`` : Dict[str, str] - qry_prov = QueryProvider( - data_environment="MSSentinel", - ) +Proxy settings for log analytics queries. +If proxies are configured in *msticpyconfig.yaml* this is used by default. +If specified as a parameter, specify proxies as a dictionary of the form +``{protocol: proxy_url}`` -.. note::"LogAnalytics" and "AzureSentinel" are also aliases - for "MSSentinel" +The only protocol supported by the driver is "https" (other protocols +can be set in *msticpyconfig.yaml* but only https is used here). +The proxy_url can contain +optional authentication information in the format +"https://username:password@proxy_host:port" +If you have a proxy configuration set in *msticpyconfig.yaml* and +you do not want to use it, set ``proxies`` to None or an empty dictionary. +This parameter can be overridden in connect method. Connecting to a MS Sentinel Workspace ------------------------------------- -Once we have instantiated the QueryProvider we need to authenticate to Sentinel +Once you've created a QueryProvider you need to authenticate to Sentinel Workspace. This is done by calling the connect() function of the Query -Provider. - -connect() requires a connection string as its parameter. For MS Sentinel -we can use the ``WorkspaceConfig`` class. +Provider. See +:py:meth:`connect() ` -WorkspaceConfig -~~~~~~~~~~~~~~~ +This function takes an initial parameter (called ``connection_str`` for +historical reasons) that can be one of the following: -.. note:: From v2.0.0 of MSTICPy the MS Sentinel QueryProvider - will automatically create a WorkspaceConfig from your settings. - Simply call ``connect`` with a ``workspace="YourWorkspace"`` parameter +* A WorkspaceConfig instance +* A connection string (this is option is being deprecated) +* None - in this case it will connect with the ``Default`` entry from + your *msticpyconfig.yaml* file. +If you omit this parameter you use the ``workspace`` parameter +to specify the workspace entry from ``msticpyconfig.yaml`` to use. -``WorkspaceConfig`` handles loading your workspace configuration and generating a -connection string from your configuration. -See :py:mod:`WorkspaceConfig API documentation` -``WorkspaceConfig`` works with workspace configuration stored in *msticpyconfig.yaml* -or *config.json* (although the former takes precedence). +Connecting to a Sentinel workspace +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To use ``WorkspaceConfig``, simple create an instance of it. It will automatically build -your connection string for use with the query provider library. +When connecting you can just pass the name of your workspace or +an instance of WorkspaceConfig to the query provider's ``connect`` method. .. code:: IPython - >>> ws_config = WorkspaceConfig() - >>> ws_config.code_connect_str + qry_prov.connect("Default") + qry_prov.connect(workspace="Default") + qry_prov.connect(workspace="MyOtherWorkspace") - "loganalytics://code().tenant('335b56ab-67a2-4118-ac14-6eb454f350af').workspace('271f17d3-5457-4237-9131-ae98a6f55c37')" + # or, passing WorkspaceConfig + qry_prov.connect(WorkspaceConfig()) + # or + qry_prov.connect(WorkspaceConfig(workspace="MyOtherWorkspace")) -You can use this connection string in the call to ``QueryProvider.connect()`` -When called without parameters, *WorkspaceConfig* loads the "Default" -entry in your *msticpyconfig.yaml* (or falls back to loading the settings -in *config.json*). To specify a different workspace pass the ``workspace`` parameter -with the name of your workspace entry. This value is the name of -the section in the *msticpyconfig* ``Workspaces`` section, which may -not necessarily be the same as your workspace name. +MS Sentinel Authentication options +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. code:: IPython +By default, the data provider will use Azure authentication +following the parameters defined in your ``msticpyconfig.yaml`` file +(or the default values if you have not configured them in this file). - >>> ws_config = WorkspaceConfig(workspace="TestWorkspace") +To read more about Azure authentication see +:doc:`Azure Authentication <../getting_started/AzureAuthentication>` +You can override several authentication parameters including: -To see which workspaces are configured in your *msticpyconfig.yaml* use -the ``list_workspaces()`` function. +* auth_types - a list of authentication types to try in order +* tenant_id - the Azure tenant ID to use for authentication -.. tip:: ``list_workspaces`` is a class function, so you do not need to - instantiate a WorkspaceConfig to call this function. +If you are using a Sovereign cloud rather than the Azure global cloud, +you should follow the guidance in +:doc:`Azure Authentication <../getting_started/AzureAuthentication>` +to configure the correct cloud. -.. code:: IPython - >>> WorkspaceConfig.list_workspaces() +Connecting to multiple Sentinel workspaces +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - {'Default': {'WorkspaceId': '271f17d3-5457-4237-9131-ae98a6f55c37', - 'TenantId': '335b56ab-67a2-4118-ac14-6eb454f350af'}, - 'Workspace1': {'WorkspaceId': 'c88dd3c2-d657-4eb3-b913-58d58d811a41', - 'TenantId': '335b56ab-67a2-4118-ac14-6eb454f350af'}, - 'TestWorkspace': {'WorkspaceId': '17e64332-19c9-472e-afd7-3629f299300c', - 'TenantId': '4ea41beb-4546-4fba-890b-55553ce6003a'}} +There are two mechanisms for querying multiple MS Sentinel workspaces. +One is a generic method common to all data providers. For more +information on this see :ref:`multiple_connections` in the main +Data Providers documentation. -Entries in msticpyconfig always take precedence over settings in your -config.json. If you want to force use of the config.json, specify the path -to the config.json file in the ``config_file`` parameter to ``WorkspaceConfig``. +The other is specific to the Sentinel data provider and is provided +by the underlying Azure Monitor client. This latter capability is described in +this section. -If you need use a specific instance of a config.json you can specify a full -path to the file you want to use when you create your ``WorkspaceConfig`` -instance. +The Sentinel data provider supports connecting to multiple workspaces by +passing a list of workspace names or workspace IDs to the ``connect`` method. +using the ``workspaces`` or ``workspace_ids`` parameters respectively. +``workspace_ids`` should be a list or tuple of workspace IDs. -Connecting to the workspace -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``workspaces`` should be a list or tuple of workspace names. In order +to use this parameter you must have these workspaces configured in +your *msticpyconfig.yaml*. -When connecting you can just pass the name of your workspace or -an instance of WorkspaceConfig to the query provider's ``connect`` method. +These parameters override the ``workspace`` parameter. -.. code:: IPython +Connecting to multiple workspaces allows you to run queries across these +workspaces and return the combined results as a single Pandas DataFrame. +The workspaces must use common authentication credentials and are +expected to have the same data schema. - qry_prov.connect(workspace="Default") - qry_prov.connect(workspace="MyOtherWorkspace") +.. code:: ipython3 - # or, passing WorkspaceConfig - qry_prov.connect(WorkspaceConfig()) - # or - qry_prov.connect(WorkspaceConfig(workspace="MyOtherWorkspace")) + qry_prov.connect(workspaces=["Default", "MyOtherWorkspace"]) + qry_prov.SecurityAlert.list_alerts() +This will return a DataFrame containing the results of the query, +the results from each workspace will be indicated by the +``TenantId`` column, which will contain the workspace ID of +each workspace. -MS Sentinel Authentication options -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. note:: This is a mechanism implemented by the underlying + **azure-monitor-query** + client library. It is independent of the MSTICPy capability to + add multiple connections to a query provider (and run parallel + queries against each workspace). You can use either of these + but we recommended using + one or the other and not both simultaneously. -By default, the data provider tries to use chained authentication, -attempting to use existing Azure credentials, if they are available. +.. warning:: Connecting to multiple workspaces like this means + that the ``schema`` property will not return anything. This + only works if you connect to a single workspace. In this case, + it will return the schema of this workspace. -- If you are running in an AML workspace, it will attempt to use - integrated MSI authentication, using the identity that you used to - authenticate to AML. -- If you have logged in to Azure CLI, the Sentinel provider will - try to use your AzureCLI credentials -- If you have your credentials stored as environment variables, it - will try to use those -- Finally, it will fall back on using interactive browser-based - device authentication. -If you are using a Sovereign cloud rather than the Azure global cloud, -you should select the appropriate cloud in the Azure section of -the *msticpyconfig*. +Other parameters for Sentinel ``connect()`` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. warning:: Although msticpy allows you to configure multiple entries for - workspaces in different tenants, you cannot currently authenticate to workspaces - that span multiple tenants in the same notebook. If you need to do this, you - should investigate - `Azure Lighthouse `__. - This allows delegated access to workspaces in multiple tenants from a single - tenant. +For ``timeout`` and ``proxies`` see the section above. + + +The WorkspaceConfig class +------------------------- + +You do not need to know the details of this class but it is used +behind the scenes to provide workspace configuration information +to the Sentinel data provider. + +``WorkspaceConfig`` handles loading your workspace configuration +and generating a connection string from your configuration. +See :py:mod:`WorkspaceConfig API documentation` + +``WorkspaceConfig`` works with workspace configuration stored in *msticpyconfig.yaml*. + +To use ``WorkspaceConfig``, simple create an instance of it. It will automatically build +your connection string for use with the query provider library. + +.. code:: python3 + + ws_config = WorkspaceConfig() + +When called without parameters, *WorkspaceConfig* loads the "Default" +entry in your *msticpyconfig.yaml*. To specify a different workspace pass the ``workspace`` parameter +with the name of your workspace entry. This value is the name of +the section in the ``msticpyconfig.yaml`` ``Workspaces`` section. + +.. note:: the ``workspace`` parameter value is the entry heading in + your ``msticpyconfig.yaml``. As mentioned above, this may + not necessarily be the same as your workspace name. + +.. code:: python3 + + ws_config = WorkspaceConfig(workspace="TestWorkspace") + + +To see which workspaces are configured in your *msticpyconfig.yaml* use +the ``list_workspaces()`` function. + +.. tip:: ``list_workspaces`` is a class function, so you do not need to + instantiate a WorkspaceConfig to call this function. + +.. code:: python3 + + WorkspaceConfig.list_workspaces() + +.. parsed-literal:: + + {'Default': {'WorkspaceId': '271f17d3-5457-4237-9131-ae98a6f55c37', + 'TenantId': '335b56ab-67a2-4118-ac14-6eb454f350af'}, + 'Workspace1': {'WorkspaceId': 'c88dd3c2-d657-4eb3-b913-58d58d811a41', + 'TenantId': '335b56ab-67a2-4118-ac14-6eb454f350af'}, + 'TestWorkspace': {'WorkspaceId': '17e64332-19c9-472e-afd7-3629f299300c', + 'TenantId': '4ea41beb-4546-4fba-890b-55553ce6003a'}} -For more details on Azure authentication see :doc:`../getting_started/AzureAuthentication`. Other MS Sentinel Documentation ------------------------------- -For examples of using the MS Defender provider, see the sample -`M365 Defender Notebook` - Built-in :ref:`data_acquisition/DataQueries:Queries for Microsoft Sentinel`. -:py:mod:`Sentinel KQL driver API documentation` +See also: :py:mod:`Sentinel KQL driver API documentation ` diff --git a/docs/source/extending/WritingDataProviders.rst b/docs/source/extending/WritingDataProviders.rst index dfe6ba132..15690ab16 100644 --- a/docs/source/extending/WritingDataProviders.rst +++ b/docs/source/extending/WritingDataProviders.rst @@ -105,7 +105,7 @@ This takes the following parameters: - ``query`` - string of query text - ``query_source`` - this is populated if the query is a MSTICPy template query read from a query yaml file (see - :doc:`Creating new queries <./Queries>`) + :doc:`Creating new queries <../extending/Queries>`) and is an instance of :py:class:`QuerySource `. This is a representation of the yaml query with extracted parameters and metadata available as explicit @@ -331,7 +331,7 @@ In the ``__init__.py`` module of data drivers Create a folder in msticpy/data/queries with the name of your *DataEnvironment* and add queries. The folder name must match the item that you added to the DataEnvironment Enum class in step 3 above. The For more details on creating queries, see -:doc:`./Queries`. +:doc:`Creating new queries <../extending/Queries>`. Query parameter names ~~~~~~~~~~~~~~~~~~~~~ diff --git a/msticpy/_version.py b/msticpy/_version.py index 9fe71cdf3..2333c41fe 100644 --- a/msticpy/_version.py +++ b/msticpy/_version.py @@ -1,2 +1,2 @@ """Version file.""" -VERSION = "2.6.0" +VERSION = "2.7.0.pre1" diff --git a/msticpy/data/core/query_defns.py b/msticpy/data/core/query_defns.py index 87e16c949..9c6bc3e80 100644 --- a/msticpy/data/core/query_defns.py +++ b/msticpy/data/core/query_defns.py @@ -88,11 +88,12 @@ class DataEnvironment(Enum): Unknown = 0 MSSentinel = 1 - AzureSentinel = 1 # alias of LogAnalytics - LogAnalytics = 1 - Kusto = 2 + AzureSentinel = 1 # alias of MSSentinel + LogAnalytics = 1 # alias of MSSentinel + MSSentinel_New = 1 # alias of MSSentinel + Kusto = 2 # alias of Kusto AzureDataExplorer = 2 # alias of Kusto - AzureSecurityCenter = 3 + Kusto_New = 2 # alias of Kusto MSGraph = 4 SecurityGraph = 4 MDE = 5 @@ -108,8 +109,10 @@ class DataEnvironment(Enum): Elastic = 14 OSQueryLogs = 15 OSQuery = 15 - MSSentinel_New = 16 - Kusto_New = 17 + MSSentinel_Legacy = 16 + MSSentinel_KQLM = 16 + Kusto_Legacy = 17 + Kusto_KQLM = 17 VelociraptorLogs = 18 Velociraptor = 18 diff --git a/msticpy/data/drivers/__init__.py b/msticpy/data/drivers/__init__.py index 6e934677f..b31cf637a 100644 --- a/msticpy/data/drivers/__init__.py +++ b/msticpy/data/drivers/__init__.py @@ -17,10 +17,9 @@ __version__ = VERSION _ENVIRONMENT_DRIVERS = { - DataEnvironment.LogAnalytics: ("kql_driver", "KqlDriver"), - DataEnvironment.AzureSecurityCenter: ("kql_driver", "KqlDriver"), + DataEnvironment.LogAnalytics: ("azure_monitor_driver", "AzureMonitorDriver"), DataEnvironment.SecurityGraph: ("security_graph_driver", "SecurityGraphDriver"), - DataEnvironment.Kusto: ("kusto_driver", "KustoDriver"), + DataEnvironment.Kusto: ("azure_kusto_driver", "AzureKustoDriver"), DataEnvironment.MDATP: ("mdatp_driver", "MDATPDriver"), DataEnvironment.MDE: ("mdatp_driver", "MDATPDriver"), DataEnvironment.LocalData: ("local_data_driver", "LocalDataDriver"), @@ -38,6 +37,8 @@ "local_velociraptor_driver", "VelociraptorLogDriver", ), + DataEnvironment.MSSentinel_Legacy: ("kql_driver", "KqlDriver"), + DataEnvironment.Kusto_Legacy: ("kusto_driver", "KustoDriver"), } CUSTOM_PROVIDERS: Dict[str, type] = {} diff --git a/msticpy/data/drivers/azure_kusto_driver.py b/msticpy/data/drivers/azure_kusto_driver.py index 160cb14a5..5f8447cc5 100644 --- a/msticpy/data/drivers/azure_kusto_driver.py +++ b/msticpy/data/drivers/azure_kusto_driver.py @@ -150,7 +150,7 @@ def __init__(self, connection_str: Optional[str] = None, **kwargs): Maximum is 3600 seconds (1 hour). (can be set here or in connect and overridden in query methods) proxies : Dict[str, str] - Proxy settings for log analytics queries. + Proxy settings for Kusto queries. Dictionary format is {protocol: proxy_url} Where protocol is https, http, etc. and proxy_url can contain optional authentication information in the format diff --git a/msticpy/data/drivers/kql_driver.py b/msticpy/data/drivers/kql_driver.py index 9362dd50f..4a83fba78 100644 --- a/msticpy/data/drivers/kql_driver.py +++ b/msticpy/data/drivers/kql_driver.py @@ -11,8 +11,7 @@ import os import re import warnings -from datetime import datetime -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union import pandas as pd from azure.core.exceptions import ClientAuthenticationError @@ -118,10 +117,6 @@ def __init__(self, connection_str: str = None, **kwargs): self._debug = kwargs.get("debug", False) super().__init__(**kwargs) self.workspace_id: Optional[str] = None - self.set_driver_property( - DriverProps.FORMATTERS, - {"datetime": self._format_datetime, "list": self._format_list}, - ) self._loaded = self._is_kqlmagic_loaded() os.environ["KQLMAGIC_LOAD_MODE"] = "silent" @@ -132,6 +127,9 @@ def __init__(self, connection_str: str = None, **kwargs): self._set_kql_env_option("enable_add_items_to_help", False) self._schema: Dict[str, Any] = {} self.environment = kwargs.pop("data_environment", DataEnvironment.MSSentinel) + self.set_driver_property( + DriverProps.EFFECTIVE_ENV, DataEnvironment.MSSentinel.name + ) self.kql_cloud, self.az_cloud = self._set_kql_cloud() for option, value in kwargs.items(): self._set_kql_option(option, value) @@ -303,7 +301,7 @@ def query_with_results( Returns ------- Tuple[pd.DataFrame, results.ResultSet] - A DataFrame (if successfull) and + A DataFrame (if successful) and Kql ResultSet. """ @@ -456,22 +454,6 @@ def _set_kql_cloud(self): self._set_kql_option("cloud", kql_cloud) return kql_cloud, az_cloud - @staticmethod - def _format_datetime(date_time: datetime) -> str: - """Return datetime-formatted string.""" - return date_time.isoformat(sep="T") + "Z" - - @staticmethod - def _format_list(param_list: Iterable[Any]): - """Return formatted list parameter.""" - fmt_list = [] - for item in param_list: - if isinstance(item, str): - fmt_list.append(f"'{item}'") - else: - fmt_list.append(f"{item}") - return ", ".join(fmt_list) - @staticmethod def _raise_query_failure(query, result): """Raise query failure exception.""" diff --git a/msticpy/data/drivers/kusto_driver.py b/msticpy/data/drivers/kusto_driver.py index 8a5cb4dec..be3dca3bd 100644 --- a/msticpy/data/drivers/kusto_driver.py +++ b/msticpy/data/drivers/kusto_driver.py @@ -13,7 +13,7 @@ from ...common.provider_settings import ProviderArgs, get_provider_settings from ...common.utility import export from ..core.query_defns import DataEnvironment -from .kql_driver import KqlDriver, QuerySource +from .kql_driver import DriverProps, KqlDriver, QuerySource __version__ = VERSION __author__ = "Ian Hellen" @@ -46,6 +46,7 @@ def __init__(self, connection_str: str = None, **kwargs): """ super().__init__(connection_str=connection_str, **kwargs) self.environment = kwargs.get("data_environment", DataEnvironment.Kusto) + self.set_driver_property(DriverProps.EFFECTIVE_ENV, DataEnvironment.Kusto.name) self._connected = True self._kusto_settings: KustoClusterSettings = _get_kusto_settings() self._cluster_uri = None diff --git a/requirements-all.txt b/requirements-all.txt index e36213394..817127ac6 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -3,7 +3,7 @@ azure-common>=1.1.18 azure-core>=1.24.0 azure-identity>=1.10.0 azure-keyvault-secrets>=4.0.0 -azure-kusto-data>=4.0.0 +azure-kusto-data>=4.0.0, <=5.0.0 azure-mgmt-compute>=4.6.2 azure-mgmt-core>=1.2.1 azure-mgmt-keyvault>=2.0.0 @@ -12,7 +12,7 @@ azure-mgmt-network>=2.7.0 azure-mgmt-resource>=16.1.0 azure-mgmt-resourcegraph>=8.0.0 azure-mgmt-subscription>=3.0.0 -azure-monitor-query>=1.0.0 +azure-monitor-query>=1.0.0, <=2.0.0 azure-storage-blob>=12.5.0 beautifulsoup4>=4.0.0 bokeh>=1.4.0, <4.0.0 @@ -21,13 +21,12 @@ deprecated>=1.2.4 dnspython>=2.0.0, <3.0.0 folium>=0.9.0 geoip2>=2.9.0 -httpx==0.24.1 +httpx>=0.23.0, <1.0.0 html5lib ipython >= 7.1.1; python_version < "3.8" ipython >= 7.23.1; python_version >= "3.8" ipywidgets>=7.4.2, <9.0.0 keyring>=13.2.1 -KqlmagicCustom[jupyter-basic,auth_code_clipboard]>=0.1.114.post22 KqlmagicCustom[jupyter-extended]>=0.1.114.post22 lxml>=4.6.5 matplotlib>=3.0.0 diff --git a/requirements.txt b/requirements.txt index bc0f24c26..d429d7be1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,11 @@ attrs>=18.2.0 azure-common>=1.1.18 azure-core>=1.24.0 azure-identity>=1.10.0 +azure-keyvault-secrets>=4.0.0 +azure-kusto-data>=4.0.0, <=5.0.0 +azure-mgmt-keyvault>=2.0.0 azure-mgmt-subscription>=3.0.0 +azure-monitor-query>=1.0.0, <=2.0.0 beautifulsoup4>=4.0.0 bokeh>=1.4.0, <4.0.0 cryptography>=3.1 @@ -10,12 +14,12 @@ deprecated>=1.2.4 dnspython>=2.0.0, <3.0.0 folium>=0.9.0 geoip2>=2.9.0 -httpx==0.24.1 +httpx>=0.23.0, <1.0.0 html5lib ipython >= 7.1.1; python_version < "3.8" ipython >= 7.23.1; python_version >= "3.8" ipywidgets>=7.4.2, <9.0.0 -KqlmagicCustom[jupyter-basic,auth_code_clipboard]>=0.1.114.post22 +keyring>=13.2.1 lxml>=4.6.5 msal>=1.12.0 msal_extensions>=0.3.0 diff --git a/setup.py b/setup.py index aa6fc3ed8..1bafb4bc9 100644 --- a/setup.py +++ b/setup.py @@ -4,13 +4,14 @@ # license information. # -------------------------------------------------------------------------- """Setup script for msticpy.""" + import re import setuptools with open("msticpy/_version.py", "r", encoding="utf-8") as fd: v_match = re.search(r'^VERSION\s*=\s*[\'"]([^\'"]*)[\'"]', fd.read(), re.MULTILINE) - __version__ = v_match.group(1) if v_match else "no version" + __version__ = v_match[1] if v_match else "no version" with open("requirements.txt", "r", encoding="utf-8") as fh: INSTALL_REQUIRES = fh.readlines() @@ -41,15 +42,8 @@ def _combine_extras(extras: list) -> list: "azure-storage-blob>=12.5.0", "azure-mgmt-resourcegraph>=8.0.0", ], - "azure_query": [ - "azure-kusto-data>=4.0.0", - "azure-monitor-query>=1.0.0", - ], - "keyvault": [ - "azure-keyvault-secrets>=4.0.0", - "azure-mgmt-keyvault>=2.0.0", - "keyring>=13.2.1", # needed by Key Vault package - ], + "azure_query": [], + "keyvault": [], "ml": [ "scikit-learn>=1.0.0", "scipy>=1.1.0", diff --git a/tests/data/drivers/test_kql_driver.py b/tests/data/drivers/test_kql_driver.py index 4cbeb55a5..92abed2c6 100644 --- a/tests/data/drivers/test_kql_driver.py +++ b/tests/data/drivers/test_kql_driver.py @@ -28,7 +28,7 @@ # from Kqlmagic import kql as kql_exec -KqlDriver = import_driver(DataEnvironment.AzureSentinel) +KqlDriver = import_driver(DataEnvironment.MSSentinel_Legacy) # from msticpy.data.drivers.kql_driver import KqlDriver GET_IPYTHON_PATCH = KqlDriver.__module__ + ".get_ipython" @@ -139,7 +139,7 @@ class ModernCred: class Token: """Mocked token class.""" - token = "Token" + token = "Token" # nosec @classmethod def get_token(cls, *args, **kwargs): diff --git a/tests/data/drivers/test_kusto_driver.py b/tests/data/drivers/test_kusto_driver.py index 446d9b618..76914fbc7 100644 --- a/tests/data/drivers/test_kusto_driver.py +++ b/tests/data/drivers/test_kusto_driver.py @@ -42,10 +42,10 @@ @pytest.fixture def kusto_qry_prov(): """Return query provider with query paths.""" - qry_path = str(get_test_data_path().joinpath("kusto")) + qry_path = str(get_test_data_path().joinpath("kusto_legacy")) msticpy_config = get_test_data_path().joinpath("msticpyconfig.yaml") with custom_mp_config(msticpy_config): - return QueryProvider("Kusto", query_paths=[qry_path]) + return QueryProvider("Kusto_Legacy", query_paths=[qry_path]) _TEST_CON_STR = [ @@ -182,10 +182,10 @@ def test_kusto_driver_integ_auth(inst, qry_args, monkeypatch, kusto_qry_prov): @pytest.mark.parametrize("inst, qry_args", _KUSTO_TESTS) def test_kusto_driver_params_fail(inst, qry_args, monkeypatch): """Test with parameters but missing config.""" - qry_path = str(get_test_data_path().joinpath("kusto")) + qry_path = str(get_test_data_path().joinpath("kusto_legacy")) msticpy_config = get_test_data_path().joinpath("msticpyconfig-nokusto.yaml") with custom_mp_config(msticpy_config): - qry_prov = QueryProvider("Kusto", query_paths=[qry_path]) + qry_prov = QueryProvider("Kusto_Legacy", query_paths=[qry_path]) driver = qry_prov._query_provider print(inst) diff --git a/tests/data/test_query_source.py b/tests/data/test_query_source.py index 56fb654d8..69134aa42 100644 --- a/tests/data/test_query_source.py +++ b/tests/data/test_query_source.py @@ -16,7 +16,6 @@ from msticpy.data.core.data_providers import DriverBase, QueryProvider from msticpy.data.core.query_source import QuerySource -from msticpy.data.drivers import kql_driver _SPLUNK_IMP_OK = False try: @@ -185,46 +184,6 @@ def test_list_formatter(self): check_list = ", ".join([str(i) for i in int_list]) self.assertIn(check_list, query) - def test_cust_formatters_kql(self): - """Test KqlDriver formatting.""" - kql_fmt = { - "datetime": kql_driver.KqlDriver._format_datetime, - "list": kql_driver.KqlDriver._format_list, - } - - test_end = datetime.utcnow() - test_start = test_end - timedelta(days=1) - ip_address_list = "192.168.0.1, 192.168.0.2, 192.168.0.3" - - check_dt_str = test_start.isoformat(sep="T") + "Z" - q_src = self.query_sources["Azure"]["list_azure_activity_for_ip"] - query = q_src.create_query( - formatters=kql_fmt, - start=test_start, - end=test_end, - ip_address_list=ip_address_list, - ) - self.assertIn(check_dt_str, query) - - query = q_src.create_query( - formatters=kql_fmt, - ip_address_list=ip_address_list, - start=test_start, - end=test_end, - ) - check_list = ", ".join([f"'{ip.strip()}'" for ip in ip_address_list.split(",")]) - self.assertIn(check_list, query) - - int_list = [1, 2, 3, 4] - query = q_src.create_query( - formatters=kql_fmt, - ip_address_list=int_list, - start=test_start, - end=test_end, - ) - check_list = ", ".join([str(i) for i in int_list]) - self.assertIn(check_list, query) - @pytest.mark.skipif(not _SPLUNK_IMP_OK, reason="Partial msticpy install") def test_cust_formatters_splunk(): diff --git a/tests/testdata/kusto_legacy/kusto_queries_mstic_app.yaml b/tests/testdata/kusto_legacy/kusto_queries_mstic_app.yaml new file mode 100644 index 000000000..2e1701838 --- /dev/null +++ b/tests/testdata/kusto_legacy/kusto_queries_mstic_app.yaml @@ -0,0 +1,131 @@ +metadata: + version: 1 + description: Kusto Queries + data_environments: [Kusto] + data_families: [AppAuthCluster.scrubbeddata] + cluster: https://msticapp.kusto.windows.net + tags: ["user"] +defaults: + parameters: + table: + description: Table name + type: str + default: "DeviceProcessEvents" + start: + description: Query start time + type: datetime + default: -30 + end: + description: Query end time + type: datetime + default: 0 + add_query_items: + description: Additional query clauses + type: str + default: "" +sources: + list_host_processes: + description: Lists all process creations for a host + metadata: + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where DeviceName has "{host_name}" + {add_query_items}' + uri: None + parameters: + host_name: + description: Name of host + type: str + process_creations: + description: Lists all processes created by name or hash + metadata: + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where FileName contains "{process_identifier}" or SHA1 has "{process_identifier}" or SHA256 has "{process_identifier}" or MD5 has "{process_identifier}" + {add_query_items}' + parameters: + process_identifier: + description: Identifier for the process, filename, or hash + type: str + process_paths: + description: Lists all processes created from a path + metadata: + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where FileName contains "{file_path}" + {add_query_items}' + parameters: + file_path: + description: full or partial path + type: str + process_cmd_line: + description: Lists all processes with a command line containing a string + metadata: + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where ProcessCommandLine contains "{cmd_line}" + {add_query_items}' + parameters: + cmd_line: + description: Command line artifact to search for + type: str + query_new_alias: + description: Lists all processes with a command line containing a string + metadata: + data_families: [AppAuthClustera.scrubbeddata] + cluster: https://msticapp.kusto.windows.net + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where ProcessCommandLine contains "{cmd_line}" + {add_query_items}' + parameters: + cmd_line: + description: Command line artifact to search for + type: str + bad_query_fam_no_dot: + description: Lists all processes with a command line containing a string + metadata: + data_families: [scrubbeddata] + cluster: https://msticapp.kusto.windows.net + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where ProcessCommandLine contains "{cmd_line}" + {add_query_items}' + parameters: + cmd_line: + description: Command line artifact to search for + type: str + bad_query_no_cluster: + description: Lists all processes with a command line containing a string + metadata: + data_families: [AppAuthCluster.scrubbeddata] + cluster: + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where ProcessCommandLine contains "{cmd_line}" + {add_query_items}' + parameters: + cmd_line: + description: Command line artifact to search for + type: str diff --git a/tests/testdata/kusto_legacy/kusto_queries_mstic_ia.yaml b/tests/testdata/kusto_legacy/kusto_queries_mstic_ia.yaml new file mode 100644 index 000000000..8b2dae600 --- /dev/null +++ b/tests/testdata/kusto_legacy/kusto_queries_mstic_ia.yaml @@ -0,0 +1,131 @@ +metadata: + version: 1 + description: Kusto Queries + data_environments: [Kusto] + data_families: [IntegAuthCluster.scrubbeddata] + cluster: https://mstic.kusto.windows.net + tags: ["user"] +defaults: + parameters: + table: + description: Table name + type: str + default: "DeviceProcessEvents" + start: + description: Query start time + type: datetime + default: -30 + end: + description: Query end time + type: datetime + default: 0 + add_query_items: + description: Additional query clauses + type: str + default: "" +sources: + list_host_processes: + description: Lists all process creations for a host + metadata: + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where DeviceName has "{host_name}" + {add_query_items}' + uri: None + parameters: + host_name: + description: Name of host + type: str + process_creations: + description: Lists all processes created by name or hash + metadata: + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where FileName contains "{process_identifier}" or SHA1 has "{process_identifier}" or SHA256 has "{process_identifier}" or MD5 has "{process_identifier}" + {add_query_items}' + parameters: + process_identifier: + description: Identifier for the process, filename, or hash + type: str + process_paths: + description: Lists all processes created from a path + metadata: + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where FileName contains "{file_path}" + {add_query_items}' + parameters: + file_path: + description: full or partial path + type: str + process_cmd_line: + description: Lists all processes with a command line containing a string + metadata: + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where ProcessCommandLine contains "{cmd_line}" + {add_query_items}' + parameters: + cmd_line: + description: Command line artifact to search for + type: str + query_new_alias: + description: Lists all processes with a command line containing a string + metadata: + data_families: [IntegAuthCluster2.scrubbeddata] + cluster: https://mstic.kusto.windows.net + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where ProcessCommandLine contains "{cmd_line}" + {add_query_items}' + parameters: + cmd_line: + description: Command line artifact to search for + type: str + bad_query_fam_no_dot: + description: Lists all processes with a command line containing a string + metadata: + data_families: [scrubbeddata] + cluster: https://mstic.kusto.windows.net + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where ProcessCommandLine contains "{cmd_line}" + {add_query_items}' + parameters: + cmd_line: + description: Command line artifact to search for + type: str + bad_query_no_cluster: + description: Lists all processes with a command line containing a string + metadata: + data_families: [IntegAuthCluster.scrubbeddata] + cluster: + args: + query: ' + {table} + | where Timestamp >= datetime({start}) + | where Timestamp <= datetime({end}) + | where ProcessCommandLine contains "{cmd_line}" + {add_query_items}' + parameters: + cmd_line: + description: Command line artifact to search for + type: str