V1.8.5 (#283)

* [DC] GitHub Webhook via S3 (#270) * [DC] Okta Connector Enhancement (#282) * [DC] Azure Subscription Connector supports Gov cloud (#281) * [Handlers] SES, SNS, Twilio, Stored Procedure, SMTP (#284) * [DC] Crowdstrike devices via API (#279) * [DC] Add Crowdstrike API Connector * [DC] Crowdstrike Static Analysis (#287) * [DC] Cisco Umbrella Connector (#280)
snowflakedb · Sep 4, 2019 · ff8176e · ff8176e
1 parent 60edab7
commit ff8176e
Show file tree

Hide file tree

Showing 23 changed files with 1,414 additions and 185 deletions.
diff --git a/.gitignore b/.gitignore
@@ -14,3 +14,4 @@ src/**/lib/
 venv
 *.db
 *.egg-info
+.vscode
diff --git a/docs/connectors/github_webhooks_s3.md b/docs/connectors/github_webhooks_s3.md
@@ -0,0 +1,32 @@
+## Getting GitHub Webhooks into S3
+
+GitHub allows Organization or Repository admins to set up webhooks for delivery to endpoints. [Webhooks](https://developer.github.com/v3/activity/events/types/) can be
+triggered when
+
+- a new commit is pushed,
+- a branch is deleted,
+- a deploy key is added/removed from a repository,
+- a new download is created in GitHub,
+- GitHub package vulnerability alerts, or
+- [many others](https://developer.github.com/v3/activity/events/types/)
+
+SnowAlert does not yet natively support webhooks, or any other internet-facing infrastructure. Thus, a separate
+component is necessary to collect GitHub webhooks to S3 before using this connector to Snowflake. One way to
+accomplish this is to use the [below archiver](https://github.com/chanzuckerberg/cztack/tree/master/github-webhooks-to-s3#github-webhooks-to-s3) to send webhooks to an AWS API Gateway which invokes an AWS Lambda.
+After the Lambda processes the request, it is then sent to AWS Firehose to be formatted and put into S3.
+
+```hcl
+module "archiver" {
+  source = "github.com/chanzuckerberg/cztack//aws-acm-certgithub-webhooks-to-s3?ref=v0.19.0"
+
+  env     = "${var.env}"
+  project = "${var.project}"
+  owner   = "${var.owner}"
+  service = "${var.component}"
+
+  fqdn            = "..."
+  certificate_arn = "..."
+  route53_zone_id = "..."
+}
+```
+
diff --git a/migrations/v1_8_4-v1_8_5.md b/migrations/v1_8_4-v1_8_5.md
@@ -0,0 +1,54 @@
+## Okta Connector
+
+
+### Enhancing Okta Connector
+
+In the v1.8.5 Okta DC, we ingest not only log data from Okta, but also User and Group information. This new information is collected to separate tables which are created by the Okta DC at time of setup.
+
+If you'd like to manually create these tables for an existing connection, you can do so with SQL like the following. Note that you may need to replace 'default' with the custom identifier for your Okta connection.
+
+~~~
+USE ROLE snowalert;
+
+SHOW TABLES LIKE 'okta_default_connection' IN DATA; --This gets you the comment for the table, which you will need later in this script.
+
+CREATE TABLE data.okta_default_users_connection (v VARIANT, event_time TIMESTAMP_LTZ);
+ALTER TABLE data.okta_default_users_connection SET COMMENT='
+<the comment from your data.okta_default_connection table
+'
+
+CREATE TABLE data.okta_default_groups_connection (v VARIANT, event_time TIMESTAMP_LTZ);
+ALTER TABLE data.okta_default_groups_connection SET COMMENT='
+<the comment from your data.okta_default_connection table
+'
+~~~
+
+## Azure Subscription Connector
+
+### Supporting Gov Cloud
+
+In the v1.8.5 Azure Subscription DC, we support both commercial and governemnt cloud. This support is enabled with an extra connection option (cloud_type: reg for commercial, cloud_type: gov for government) that must be added to your connection table, like so.
+
+~~~
+USE ROLE snowalert;
+
+SHOW TABLES LIKE 'azure_subscription%' IN DATA; --This gets you the comment for the table.
+
+ALTER TABLE data.azure_subscription_default_connection SET COMMENT='
+---
+module: azure_subscription
+cloud_type: reg
+client_id: <CLIENT_ID>
+tenant_id: <TENANT_ID>
+client_secret: <CLIENT_SECRET>
+';
+
+ALTER TABLE data.azure_subscription_default_gov_connection SET COMMENT='
+---
+module: azure_subscription
+cloud_type: gov
+client_id: <CLIENT_ID>
+tenant_id: <TENANT_ID>
+client_secret: <CLIENT_SECRET>
+';
+~~~
diff --git a/src/baseline_modules/temporal_by_day/temporal_by_day.R b/src/baseline_modules/temporal_by_day/temporal_by_day.R
@@ -8,10 +8,10 @@ require(purrr)
 #ID
 
 get_percentiles <- function(dataframe, column_name, exit_name, column_names_summarize_by ){
-  p <- c(.1, .25,.75,.95,.5)
-  p_names <- map_chr(p, ~paste0(.x*100, exit_name))
+  p <- c(.025,.05,.1, .25,.75,.95,.975,.5)
+  p_names <- map_chr(p, ~paste0(ifelse(floor(.x*100)<.x*100, gsub(".*\\.","",toString(.x)),.x*100), exit_name))
   p_funs <- map(p, ~partial(quantile, probs=.x, na.rm=TRUE)) %>%
-    set_names(nm=p_names)
+  set_names(nm=p_names)
   avg_funs <- map(1, ~partial(mean, probs=.x, na.rm=TRUE)) %>% set_names(nm=paste0('avg_', exit_name))
   number_of <- map(1, ~partial(sum, probs=.x, na.rm=TRUE)) %>% set_names(nm=paste0('_', exit_name))
   max_funs <- map(1, ~partial(max, probs=.x, na.rm=TRUE)) %>% set_names(nm=paste0('max_', exit_name))
@@ -21,8 +21,8 @@ get_percentiles <- function(dataframe, column_name, exit_name, column_names_summ
   return(dataframe %>% group_by_(column_name) %>% summarize_at(vars(column_names_summarize_by), full_funs))
 }
 
-
 results <- input_table
+results$PIVOT = results$LANDING_TABLE
 print(colnames(results))
 if('EVENT_TIME' %in% colnames(results)) {
 	print('Event time case triggered')
@@ -42,6 +42,7 @@ if(!('ID' %in% colnames(results))){
 
 if('NUM_EVENTS' %in% colnames(results)){
 	print('Num events found')
+  results$NUM_EVENTS <- as.integer(results$NUM_EVENTS)
   by_day_when_present <- results %>% 
     group_by(PIVOT, DAY)%>% 
     summarise(num_events=sum(NUM_EVENTS), num_ids=sum(ID))
@@ -53,6 +54,7 @@ by_day_when_present <- results %>%
             num_ids=length(unique(ID))
   )
 }
+
 print(by_day_when_present)
 earliest_time <- min(results$DAY, na.rm=TRUE)
 latest_time <- max(results$DAY, na.rm=TRUE)
@@ -66,19 +68,34 @@ expand_days_date <- expand_days %>%group_by(PIVOT)%>%summarise(earliest=min(DAY)
 numerics <- merge(when_present_numeric, expand_days_numeric, by='PIVOT', all.x=TRUE, all.y=TRUE)
 dates <- merge(when_present_date, expand_days_date, by='PIVOT', all.x=TRUE, all.y=TRUE)
 full <- cbind(numerics, dates)
-return_value <- full[c('PIVOT', 'num_ids_10when_present',
-                       'num_events_10when_present', 'num_ids_25when_present', 
-                       'num_events_25when_present', 'num_ids_75when_present', 
-                       'num_events_75when_present','num_ids_95when_present', 
-                       'num_events_95when_present','num_ids_50when_present', 
-                       'num_events_50when_present', 'num_ids_avg_when_present', 
-                       'num_events_avg_when_present',
+full$earliest_when_present <- as.character(full$earliest_when_present)
+full$latest_when_present <- as.character(full$latest_when_present)
+return_value <- full[c('PIVOT', 
+                       'num_ids_025when_present','num_events_025when_present',
+                       'num_ids_5when_present','num_events_5when_present',
+                       'num_ids_10when_present','num_events_10when_present',
+                       'num_ids_25when_present', 'num_events_25when_present', 
+                       'num_ids_75when_present', 'num_events_75when_present',
+                       'num_ids_95when_present', 'num_events_95when_present',
+                       'num_ids_975when_present', 'num_events_975when_present',
+                       'num_ids_50when_present', 'num_events_50when_present', 
+                       'num_ids_avg_when_present', 'num_events_avg_when_present',
                        'num_ids__when_present', 'num_events__when_present',
                        'num_ids_max_when_present', 'num_events_max_when_present',
                        'num_ids_min_when_present', 'num_events_min_when_present',
+                       'num_ids_025overall','num_events_025overall',
+                       'num_ids_5overall','num_events_5overall',
+                       'num_ids_10overall','num_events_10overall',
+                       'num_ids_25overall', 'num_events_25overall',
+                       'num_ids_75overall', 'num_events_75overall',
+                       'num_ids_95overall', 'num_events_95overall',
+                       'num_ids_975overall', 'num_events_975overall',
+                       'num_ids__overall', 'num_events__overall',
+                       'num_ids_max_overall', 'num_events_max_overall',
+                       'num_ids_min_overall', 'num_events_min_overall',
                        'num_ids_50overall', 'num_events_50overall',
                        'num_ids_avg_overall', 'num_events_avg_overall',
                        'earliest_when_present', 'latest_when_present',
                        'num_days', 'num_days_overall'
-                       )
-                     ]
+)
+]
diff --git a/src/connectors/__init__.py b/src/connectors/__init__.py
@@ -6,9 +6,12 @@
 from . import azure_log
 from . import azure_subscription
 from . import azure_vm
+from . import github_webhooks_s3
 from . import gsuite_logs
 from . import okta
 from . import tenable_settings
+from . import crowdstrike_devices
+from . import cisco_umbrella
 
 __all__ = [
     'aws_inventory',
@@ -19,9 +22,12 @@
     'azure_log',
     'azure_subscription',
     'azure_vm',
+    'github_webhooks_s3',
     'gsuite_logs',
     'okta',
     'tenable_settings',
+    'crowdstrike_devices',
+    'cisco_umbrella',
 ]
 
 connectors = {
@@ -33,9 +39,12 @@
     'azure_log': azure_log,
     'azure_subscription': azure_subscription,
     'azure_vm': azure_vm,
+    'github_webhooks_s3': github_webhooks_s3,
     'gsuite_logs': gsuite_logs,
     'okta': okta,
     'tenable_settings': tenable_settings,
+    'crowdstrike_devices': crowdstrike_devices,
+    'cisco_umbrella': cisco_umbrella
 }
 
 CONNECTION_OPTIONS = [

diff --git a/src/connectors/azure_subscription.py b/src/connectors/azure_subscription.py
@@ -6,6 +6,7 @@
 
 from runners.helpers import db
 from runners.helpers.dbconfig import ROLE as SA_ROLE
+from .utils import yaml_dump
 
 from azure.mgmt.subscription.subscription_client import SubscriptionClient
 from azure.common.client_factory import get_client_from_json_dict
@@ -36,6 +37,18 @@
         'secret': 'true',
         'required': True
     },
+    {
+        'type': 'str',
+        'name': 'cloud_type',
+        'options': [
+            {'value': 'reg', 'label': "Azure Cloud"},
+            {'value': 'gov', 'label': "Azure Gov Cloud"}
+        ],
+        'title': "Cloud Type",
+        'placeholder': "Choose Cloud Type",
+        'prompt': "Azure provides two types of clouds: regular and government",
+        'required': True
+    },
 ]
 
 LANDING_TABLE_COLUMNS = [
@@ -53,17 +66,11 @@
 
 def connect(connection_name, options):
     base_name = f"azure_subscription_{connection_name}"
-    tenant_id = options['tenant_id']
-    client_id = options['client_id']
-    client_secret = options['client_secret']
 
-    comment = f'''
----
-module: azure_subscription
-client_id: {client_id}
-tenant_id: {tenant_id}
-client_secret: {client_secret}
-'''
+    comment = yaml_dump(
+        module='azure_subscription',
+        **options
+    )
 
     db.create_table(
         name=f'data.{base_name}_connection',
@@ -83,14 +90,30 @@ def ingest(table_name, options):
     tenant_id = options['tenant_id']
     client_id = options['client_id']
     client_secret = options['client_secret']
+    cloud_type = options['cloud_type']
+
+    activeDirectoryEndpoints = {
+        'reg': "https://login.microsoftonline.com",
+        'gov': "https://login.microsoftonline.us"
+    }
+
+    resourceManagerEndpoints = {
+        'reg': "https://management.azure.com/",
+        'gov': "https://management.usgovcloudapi.net"
+    }
+
+    managementEndpoints = {
+        'reg': "https://management.core.windows.net/",
+        'gov': "https://management.core.usgovcloudapi.net"
+    }
 
     subscriptions_service = get_client_from_json_dict(SubscriptionClient, {
         "tenantId": tenant_id,
         "clientId": client_id,
         "clientSecret": client_secret,
-        "activeDirectoryEndpointUrl": "https://login.microsoftonline.com",
-        "resourceManagerEndpointUrl": "https://management.azure.com/",
-        "managementEndpointUrl": "https://management.core.windows.net/",
+        "activeDirectoryEndpointUrl": activeDirectoryEndpoints[cloud_type],
+        "resourceManagerEndpointUrl": resourceManagerEndpoints[cloud_type],
+        "managementEndpointUrl": managementEndpoints[cloud_type],
     }).subscriptions
 
     subscription_list = subscriptions_service.list()