DataDog · sva91 · Nov 26, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 26, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,9 @@
 *.pyc
 **/__pycache__/**
 .idea/
+**/.terraform/**
+**/.terraform.lock.hcl
+**/.DS_Store
+**/terraform.tfstate
+**/terraform.tfstate.backup
+**/terraform.tfvars
diff --git a/datadog-logs-oci-orm/data.tf b/datadog-logs-oci-orm/data.tf
@@ -0,0 +1,10 @@
+data "external" "logging_services" {
+    program = ["bash", "logging_services.sh"]
+}
+
+data "oci_identity_tenancy" "tenancy_metadata" {
+  tenancy_id = var.tenancy_ocid
+}
+
+data "oci_identity_regions" "all_regions" {
+}
diff --git a/datadog-logs-oci-orm/locals.tf b/datadog-logs-oci-orm/locals.tf
@@ -0,0 +1,128 @@
+locals {
+  # Tags for the provisioned resource
+  freeform_tags = {
+    datadog-terraform = "true"
+  }
+  home_region_name = [for region in data.oci_identity_regions.all_regions.regions : region.name if region.key == data.oci_identity_tenancy.tenancy_metadata.home_region_key][0]
+}
+
+locals {
+  logging_compartment_ids = toset(split(",", var.logging_compartments))
+
+  # Parse the content from the external data source
+  logging_services = jsondecode(data.external.logging_services.result["content"])
+
+  # Filter services to exclude those in exclude_services
+  filtered_services = [
+    for service in local.logging_services : service
+    if contains(var.include_services, service.id)
+  ]
+
+  # Generate a Cartesian product of compartments and filtered services
+  logging_targets = flatten([
+    for compartment_id in local.logging_compartment_ids : [
+      for service in local.filtered_services : {
+      compartment_id = compartment_id
+      service_id     = service.id
+      resource_types = service.resourceTypes
+      }
+    ]
+  ])
+}
+
+locals {
+  # Combine and group resources by compartment ID (may still contain nested lists)
+  compartment_resources = {
+    for compartment_group, resources in module.resourcediscovery :
+    split("_", compartment_group)[0] => resources.response...
+    if length(resources.response) > 0
+  }
+}
+
+locals {
+  /*
+    This code snippet processes a list of filtered services to create a mapping of service IDs and resource types to their corresponding categories.
+
+    Steps:
+    1. Flatten the `filtered_services` list to create `service_resource_type_list`, which contains objects with combined service ID and resource type as the key, and a list of category names as the value.
+    2. Create `service_category_map` using `zipmap`, where the keys are the combined service ID and resource type, and the values are the lists of category names.
+    3. Transform `service_category_map` into `transformed_service_map`, where if any category name starts with "all", only those categories are kept; otherwise, all categories are kept.
+
+    Input:
+    local.filtered_services = [
+      {
+        id = "service1",
+        resourceTypes = [
+          {
+            name = "type1",
+            categories = ["cat1", "all-cat2"]
+          },
+          {
+            name = "type2",
+            categories = ["cat3"]
+          }
+        ]
+      },
+      {
+        id = "service2",
+        resourceTypes = [
+          {
+            name = "type3",
+            categories = ["all-cat4", "cat5"]
+          }
+        ]
+      }
+    ]
+
+    Output:
+    local.service_map = {
+      "service1_type1" = ["all-cat2"],
+      "service1_type2" = ["cat3"],
+      "service2_type3" = ["all-cat4"]
+    }
+  */
+  # Flatten filtered_services to get service_id_resource_type and corresponding categories
+  service_resource_type_list = flatten([
+    for service in local.filtered_services : [
+      for rt in service.resourceTypes : {
+        key       = "${service.id}_${rt.name}" # Combine service ID and resource type as the key
+        categories = [for cat in rt.categories : cat.name] # List of category names
+      }
+    ]
+  ])
+
+  # Create service_category_map using zipmap
+  service_category_map = zipmap(
+    [for item in local.service_resource_type_list : item.key],        # Keys: service_id_resource_type
+    [for item in local.service_resource_type_list : item.categories] # Values: category name lists
+  )
+
+  service_map = tomap({
+    for key, values in local.service_category_map : 
+    key => (
+      length([for value in values : value if substr(value, 0, 3) == "all"]) > 0 ? 
+      [for value in values : value if substr(value, 0, 3) == "all"] : 
+      values
+    )
+  })
+}
+
+locals {
+  preexisting_service_log_groups = flatten([
+    for compartment_id, value in module.logging : 
+      value.details.preexisting_log_groups
+  ])
+
+  # Extract service log groups if they are not null
+  datadog_service_log_groups = [
+    for compartment_id, value in module.logging : 
+      {
+        log_group_id = try(value.details.datadog_service_log_group_id, null)
+        compartment_id     = compartment_id
+      }
+      if try(value.details.datadog_service_log_group_id, null) != null
+  ]
+
+  service_log_groups = toset(concat(local.preexisting_service_log_groups, local.datadog_service_log_groups))
+
+}
diff --git a/datadog-logs-oci-orm/logging_services.sh b/datadog-logs-oci-orm/logging_services.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+output_file="oci_logging_services.json"
+echo "[]" > $output_file # Initialize the output file with an empty JSON array
+
+# Fetch logging services using OCI CLI
+response=$(oci logging service list --all --query "data[].{id:id, resourceTypes:\"resource-types\"[].{name:name, categories:categories[].{name:name}}}" --output json)
+
+# Write the response to the output file
+echo "$response" > "$output_file"
+
+# Output the response in a valid JSON map for Terraform's external data source
+content=$(jq -c . < "$output_file") # Ensure the file's content is compact JSON
+rm -f "$output_file"
+echo "{\"content\": \"$(echo "$content" | sed 's/"/\\"/g')\"}" # Escape quotes for JSON compatibility
diff --git a/datadog-logs-oci-orm/logs-function/Dockerfile b/datadog-logs-oci-orm/logs-function/Dockerfile
@@ -0,0 +1,18 @@
+FROM fnproject/python:3.11-dev AS build-stage
+WORKDIR /function
+ADD requirements.txt /function/
+
+RUN pip3 install --target /python/  --no-cache --no-cache-dir -r requirements.txt && \
+    rm -fr ~/.cache/pip /tmp* requirements.txt func.yaml Dockerfile .venv && \
+    chmod -R o+r /python && \
+    groupadd --gid 1000 fn && \
+    adduser --uid 1000 --gid fn fn
+ADD . /function/
+RUN rm -fr /function/.pip_cache
+FROM fnproject/python:3.11
+WORKDIR /function
+COPY --from=build-stage /python /python
+COPY --from=build-stage /function /function
+RUN chmod -R o+r /function
+ENV PYTHONPATH=/function:/python
+ENTRYPOINT ["/python/bin/fdk", "/function/func.py", "handler"]
diff --git a/datadog-logs-oci-orm/logs-function/README.md b/datadog-logs-oci-orm/logs-function/README.md
@@ -0,0 +1,41 @@
+# Service Connector Hub function
+OCI function used for log forwarding. See [DataDog documentation for more info](https://docs.datadoghq.com/integrations/oracle_cloud_infrastructure/?tab=serviceconnectorhub#oci-function).
+
+## Send Test Log Events to Datadog
+This guide will help you test sending log events to Datadog. Follow the steps below to install dependencies, configure your API key, and send test log events.
+
+### Step 1: Install Dependencies
+First, install the required dependencies listed in the `requirements.txt` file. You can do this using `pip`:
+
+```sh
+pip install -r requirements.txt
+```
+
+### Step 2: Configure API Key
+Before sending log events, you need to add your Datadog API key to the `func.yaml` file. Open `func.yaml` and set the DATADOG_TOKEN variable to your API key:
+
+```
+DATADOG_TOKEN: your_datadog_api_key_here
+```
+You can optionally add tags as well by updating `DATADOG_TAGS` field.
+
+### Step 3: Prepare Input Data
+Ensure that the `input.json` file contains the message you want to send. The message should include a `time` field. You will need to update this field to be within 18 hours of the current time.
+
+### Step 4: Send Test Log Events
+You can now send test log events using the `make_test_request.py` script. You can pass an optional parameter `--count` to indicate the number of messages to be produced. The message will be published the specified number of times. `cd` into the current directory.
+
+```commandline
+python tests/test_request.py --count=<count>
+```
+**Notes**
+- Ensure that the `time` field in the message in `input.json` is updated to be within 18 hours of the current time before running the script. 
+- The script will validate that the `--count` parameter is greater than 0 if provided.
+
+By following these steps, you can test sending log events to Datadog.
+
+## Tests
+
+To run tests, cd into the current directory and run:
+
+`python3 -m unittest`
diff --git a/datadog-logs-oci-orm/logs-function/func.py b/datadog-logs-oci-orm/logs-function/func.py
@@ -0,0 +1,165 @@
+import io
+import os
+import json
+import logging
+import gzip
+import requests
+
+logger = logging.getLogger(__name__)
+
+DD_SOURCE = "oci.logs"  # Adding a source default name. The source will be mapped to the log's respective service to be processed by the correct pipeline in Datadog.
+DD_SERVICE = "oci"  # Adding a service name.
+DD_TIMEOUT = 10 * 60  # Adding a timeout for the Datadog API call.
+DD_BATCH_SIZE = 1000  # Adding a batch size for the Datadog API call.
+REDACTED_FIELDS = [
+    "data.identity.credentials",
+    "data.request.headers.Authorization",
+    "data.request.headers.authorization",
+    "data.request.headers.X-OCI-LB-PrivateAccessMetadata",
+    "data.request.headers.opc-principal"
+]
+
+def _should_compress_payload() -> bool:
+    return os.environ.get("DD_COMPRESS", "true").lower() == "true"
+
+
+def _compress_payload(payload: list[dict]):
+    try:
+        return gzip.compress(json.dumps(payload).encode())
+    except Exception as ex:
+        logger.error("Could not compress payload to gzip", extra={'Exception': ex})
+        return payload
+
+
+def _process(body: list[dict]) -> None:
+    """
+    Processes a list of log entries and sends them to the Datadog API.
+    This function retrieves the Datadog endpoint URL and token from environment variables,
+    processes each log entry in the provided list, compresses the payload, and sends it
+    to the Datadog API.
+
+    Args:
+        body (list[dict]): A list of log entries, where each log entry is represented as a dictionary.
+
+    Raises:
+        KeyError: If the required environment variables 'DATADOG_HOST' or 'DATADOG_TOKEN' are not set.
+        Exception: If there is an error during the API request or payload processing.
+    """
+    try:
+        dd_host = os.environ['DATADOG_HOST']
+        dd_token = os.environ['DATADOG_TOKEN']
+    except KeyError:
+        err_msg = (
+            "Could not find environment variables, please ensure DATADOG_HOST and DATADOG_TOKEN "
+            "are set as environment variables."
+        )
+        logger.error(err_msg)
+        raise KeyError(err_msg)
+
+    try:
+        dd_url = f"https://{dd_host}/api/v2/logs"
+        payload = [_redact_sensitive_data(_process_single_log(b)) for b in body]
+        headers = {
+            "Content-type": "application/json",
+            "DD-API-KEY": dd_token
+        }
+        compressed_payload = payload
+        if _should_compress_payload():
+            compressed_payload = _compress_payload(payload=payload)
+
+        if isinstance(compressed_payload, bytes):
+            headers["Content-Encoding"] = "gzip"
+            res = requests.post(dd_url, data=compressed_payload, headers=headers, timeout=DD_TIMEOUT)
+        else:
+            res = requests.post(dd_url, json=compressed_payload, headers=headers, timeout=DD_TIMEOUT)
+
+        logger.info(res.text)
+    except Exception as ex:
+        logger.exception(ex)
+
+def _get_oci_source_name(body: dict) -> str:
+    """
+    Returns the source name for the log entry.
+    This function determines if the log is an Audit log, and if not, what source it is coming from .
+
+    Args:
+        body (dict): A log entry represented as a dictionary.
+
+    Returns:
+        str: The source name for the log entry.
+    """
+    oracle = body.get("oracle")
+    logtype = body.get("type")
+
+    if oracle != None and oracle.get("loggroupid") != None and oracle.get("loggroupid") == "_Audit":
+        return "oci.audit"
+
+    if logtype != None and logtype != "":
+        # logtype is of format com.oraclecloud.{service}.{resource-type}.{category}
+        split_logtype = logtype.split(".")
+        if len(split_logtype) >= 3:
+            return "oci." + split_logtype[2]
+
+    return DD_SOURCE
+
+def _redact_sensitive_data(body: dict) -> dict:
+    def redact_field(obj, field_path):
+        keys = field_path.split(".")
+        for key in keys[:-1]:
+            if key in obj and isinstance(obj[key], dict):
+                obj = obj[key]
+            else:
+                return  # Stop if the path does not exist or isn't a dict
+        # Redact the final key if it exists and is not None
+        if keys[-1] in obj and obj[keys[-1]] is not None:
+            obj[keys[-1]] = "******"
+
+    for field_path in REDACTED_FIELDS:
+        redact_field(body, field_path)
+
+    return body
+def _process_single_log(body: dict) -> dict:
+    modified_body = _redact_sensitive_data(body)
+
+    data = modified_body.get("data", {})
+    source = modified_body.get("source")
+    time = modified_body.get("time")
+    logtype = modified_body.get("type")
+    oracle = modified_body.get("oracle")
+    ddsource = _get_oci_source_name(modified_body)
+
+    payload = {
+        "source": source,
+        "timestamp": time,
+        "data": data,
+        "ddsource": ddsource,
+        "service": DD_SERVICE,
+        "type": logtype,
+        "oracle": oracle
+    }
+
+    dd_tags = os.environ.get('DATADOG_TAGS', '')
+    if dd_tags:
+        payload['ddtags'] = dd_tags
+
+    return payload
+
+
+def handler(ctx, data: io.BytesIO = None) -> None:
+    """
+    This function receives the logging json and invokes the Datadog endpoint
+    for ingesting logs. https://docs.cloud.oracle.com/en-us/iaas/Content/Logging/Reference/top_level_logging_format.htm#top_level_logging_format
+    If this Function is invoked with more than one log the function go over
+    each log and invokes the Datadog endpoint for ingesting one by one.
+    Datadog Logs API: https://docs.datadoghq.com/api/latest/logs/#send-logs
+    """
+    try:
+        body = json.loads(data.getvalue())
+        if isinstance(body, list):
+            for i in range(0, len(body), DD_BATCH_SIZE):
+                batch = body[i:i + DD_BATCH_SIZE]
+                _process(batch)
+        else:
+            _process([body])
+    except Exception as ex:
+        logger.exception(ex)