Skip to content

Commit

Permalink
fix: Add incident resolution logic for enriched alerts (#3185)
Browse files Browse the repository at this point in the history
  • Loading branch information
VladimirFilonov authored Jan 29, 2025
1 parent d90d774 commit f4898f7
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 5 deletions.
29 changes: 25 additions & 4 deletions keep/api/routes/alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
from fastapi import APIRouter, Depends, HTTPException, Query, Request
from fastapi.responses import JSONResponse
from pusher import Pusher
from sqlmodel import Session

from keep.api.arq_pool import get_pool
from keep.api.bl.enrichments_bl import EnrichmentsBl
from keep.api.consts import KEEP_ARQ_QUEUE_BASIC
from keep.api.core.config import config
from keep.api.core.db import get_alert_audit as get_alert_audit_db
from keep.api.core.db import get_alert_audit as get_alert_audit_db, enrich_alerts_with_incidents, \
is_all_alerts_resolved, get_session
from keep.api.core.db import (
get_alerts_by_fingerprint,
get_alerts_metrics_by_provider,
Expand All @@ -33,10 +35,11 @@
AlertDto,
DeleteRequestBody,
EnrichAlertRequestBody,
UnEnrichAlertRequestBody,
UnEnrichAlertRequestBody, IncidentStatus,
)
from keep.api.models.alert_audit import AlertAuditDto
from keep.api.models.db.alert import ActionType
from keep.api.models.db.rule import ResolveOn
from keep.api.models.search_alert import SearchAlertsRequest
from keep.api.models.time_stamp import TimeStampFilter
from keep.api.tasks.process_event_task import process_event
Expand Down Expand Up @@ -555,11 +558,13 @@ def enrich_alert(
dispose_on_new_alert: Optional[bool] = Query(
False, description="Dispose on new alert"
),
session: Session = Depends(get_session),
) -> dict[str, str]:
return _enrich_alert(
enrich_data,
authenticated_entity=authenticated_entity,
dispose_on_new_alert=dispose_on_new_alert,
session=session
)


Expand All @@ -569,6 +574,7 @@ def _enrich_alert(
IdentityManagerFactory.get_auth_verifier(["write:alert"])
),
dispose_on_new_alert: Optional[bool] = False,
session: Optional[Session] = None
) -> dict[str, str]:
tenant_id = authenticated_entity.tenant_id
logger.info(
Expand All @@ -580,9 +586,10 @@ def _enrich_alert(
)

should_run_workflow = False
should_check_incidents_resolution = False

try:
enrichement_bl = EnrichmentsBl(tenant_id)
enrichement_bl = EnrichmentsBl(tenant_id, db=session)
# Shahar: TODO, change to the specific action type, good enough for now
if (
"status" in enrich_data.enrichments
Expand All @@ -603,6 +610,8 @@ def _enrich_alert(
)
action_description = f"Alert status was changed to {enrich_data.enrichments['status']} by API `{authenticated_entity.api_key_name}`"
should_run_workflow = True
if enrich_data.enrichments["status"] == "resolved":
should_check_incidents_resolution = True
elif "note" in enrich_data.enrichments and enrich_data.enrichments["note"]:
action_type = ActionType.COMMENT
action_description = f"Comment added by {authenticated_entity.email} - {enrich_data.enrichments['note']}"
Expand Down Expand Up @@ -630,7 +639,7 @@ def _enrich_alert(
)
return {"status": "failed"}

enriched_alerts_dto = convert_db_alerts_to_dto_alerts(alert)
enriched_alerts_dto = convert_db_alerts_to_dto_alerts(alert, session=session)
# push the enriched alert to the elasticsearch
try:
logger.info("Pushing enriched alert to elasticsearch")
Expand Down Expand Up @@ -666,6 +675,18 @@ def _enrich_alert(
workflow_manager.insert_events(
tenant_id=tenant_id, events=[enriched_alerts_dto[0]]
)

if should_check_incidents_resolution:
enrich_alerts_with_incidents(tenant_id=tenant_id, alerts=alert)
for incident in alert[0]._incidents:
if incident.resolve_on == ResolveOn.ALL.value and is_all_alerts_resolved(
incident=incident,
session=session
):
incident.status = IncidentStatus.RESOLVED.value
session.add(incident)
session.commit()

return {"status": "ok"}

except Exception as e:
Expand Down
64 changes: 63 additions & 1 deletion tests/test_incidents.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
remove_alerts_to_incident_by_incident_id,
)
from keep.api.core.db_utils import get_json_extract_field
from keep.api.core.dependencies import SINGLE_TENANT_UUID
from keep.api.core.dependencies import SINGLE_TENANT_UUID, SINGLE_TENANT_EMAIL
from keep.api.models.alert import (
AlertSeverity,
AlertStatus,
Expand All @@ -33,6 +33,8 @@
from keep.api.models.db.alert import Alert, LastAlertToIncident, Incident, NULL_FOR_DELETED_AT
from keep.api.models.db.tenant import Tenant
from keep.api.utils.enrichment_helpers import convert_db_alerts_to_dto_alerts
from keep.identitymanager.authenticatedentity import AuthenticatedEntity
from keep.identitymanager.rbac import Admin
from tests.conftest import ElasticClientMock, PusherMock, WorkflowManagerMock
from tests.fixtures.client import client, test_app # noqa

Expand Down Expand Up @@ -451,6 +453,66 @@ def test_incident_status_change(
)


@pytest.mark.parametrize("test_app", ["NO_AUTH"], indirect=True)
def test_incident_status_change_manual_alert_enrichment(
db_session, client, test_app, create_alert
):
# Create an alert and add it to an incident
create_alert(
"alert-test",
AlertStatus.FIRING,
datetime.utcnow(),
{"severity": AlertSeverity.CRITICAL.value},
)
alert = db_session.query(Alert).filter_by(fingerprint="alert-test").first()
incident = create_incident_from_dict(
SINGLE_TENANT_UUID,
{"user_generated_name": "Test Incident", "user_summary": "Test Incident Summary"},
)
add_alerts_to_incident_by_incident_id(
SINGLE_TENANT_UUID, incident.id, [alert.fingerprint], session=db_session
)

# Ensure incident has one firing alert
incident = get_incident_by_id(SINGLE_TENANT_UUID, incident.id, with_alerts=True, session=db_session)
assert incident.status == IncidentStatus.FIRING.value
assert len(incident._alerts) == 1
assert incident._alerts[0].event["status"] == AlertStatus.FIRING.value

with patch(
"keep.identitymanager.identity_managers.noauth.noauth_authverifier.NoAuthVerifier._verify_api_key",
return_value=AuthenticatedEntity(
tenant_id=SINGLE_TENANT_UUID,
email=SINGLE_TENANT_EMAIL,
api_key_name=SINGLE_TENANT_UUID,
role=Admin.get_name(),
)
):
# Manually enrich the alert to change its status to resolved
client.post(
f"/alerts/enrich?dispose_on_new_alert=true",
headers={"x-api-key": "some-key"},
json={
"enrichments": {
"status": AlertStatus.RESOLVED.value,
"dismissed": False,
"dismissUntil": ""
},
"fingerprint": incident._alerts[0].fingerprint
}
)

# Refresh incident data and verify status change
db_session.expire_all()
incident = get_incident_by_id(SINGLE_TENANT_UUID, incident.id, with_alerts=True, session=db_session)
assert len(incident._alerts) == 1

alert_dtos = convert_db_alerts_to_dto_alerts(incident._alerts)

assert alert_dtos[0].status == AlertStatus.RESOLVED.value
assert incident.status == IncidentStatus.RESOLVED.value


@pytest.mark.parametrize("test_app", ["NO_AUTH"], indirect=True)
def test_incident_metadata(
db_session, client, test_app, setup_stress_alerts_no_elastic
Expand Down

0 comments on commit f4898f7

Please sign in to comment.