Skip to content

Commit

Permalink
chore(anomaly detection): replace placeholder get_historical_data wit…
Browse files Browse the repository at this point in the history
…h seer call (#76977)

Implement the seer call logic in `get_historical_anomaly_data_from_seer`
  • Loading branch information
mifu67 committed Sep 5, 2024
1 parent eb403aa commit ca59242
Show file tree
Hide file tree
Showing 6 changed files with 373 additions and 74 deletions.
152 changes: 141 additions & 11 deletions src/sentry/seer/anomaly_detection/get_historical_anomalies.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,154 @@
import logging
from datetime import datetime

from django.conf import settings
from urllib3.exceptions import MaxRetryError, TimeoutError

from sentry.conf.server import SEER_ANOMALY_DETECTION_ENDPOINT_URL
from sentry.incidents.models.alert_rule import AlertRule, AlertRuleStatus
from sentry.models.project import Project
from sentry.seer.anomaly_detection.types import AnomalyType
from sentry.net.http import connection_from_url
from sentry.seer.anomaly_detection.types import AnomalyDetectionConfig, DetectAnomaliesRequest
from sentry.seer.anomaly_detection.utils import (
fetch_historical_data,
format_historical_data,
translate_direction,
)
from sentry.seer.signed_seer_api import make_signed_seer_api_request
from sentry.snuba.models import SnubaQuery
from sentry.snuba.utils import get_dataset
from sentry.utils import json
from sentry.utils.json import JSONDecodeError

logger = logging.getLogger(__name__)

seer_anomaly_detection_connection_pool = connection_from_url(
settings.SEER_ANOMALY_DETECTION_URL,
timeout=settings.SEER_ANOMALY_DETECTION_TIMEOUT,
)


def get_historical_anomaly_data_from_seer(
alert_rule: AlertRule, project: Project, start_string: str, end_string: str
) -> list | None:
"""
Send time series data to Seer and return anomaly detection response (PLACEHOLDER).
Send time series data to Seer and return anomaly detection response.
"""
if alert_rule.status == AlertRuleStatus.NOT_ENOUGH_DATA.value:
return []
# don't think this can happen but mypy is yelling
if not alert_rule.snuba_query:
logger.error(
"No snuba query associated with alert rule",
extra={
"alert_rule_id": alert_rule.id,
},
)
return None
subscription = alert_rule.snuba_query.subscriptions.first()
# same deal as above
if not subscription:
logger.error(
"No subscription associated with alert rule",
extra={"alert_rule_id": alert_rule.id, "snuba_query_id": alert_rule.snuba_query_id},
)
return None
snuba_query = SnubaQuery.objects.get(id=alert_rule.snuba_query_id)
dataset = get_dataset(snuba_query.dataset)
window_min = int(snuba_query.time_window / 60)
start = datetime.fromisoformat(start_string)
end = datetime.fromisoformat(end_string)
historical_data = fetch_historical_data(
alert_rule=alert_rule, snuba_query=snuba_query, project=project, start=start, end=end
)

if not historical_data:
logger.error(
"No historical data available",
extra={
"alert_rule_id": alert_rule.id,
"snuba_query_id": alert_rule.snuba_query_id,
"project_id": project.id,
"start": start,
"end": end,
},
)
return None
formatted_data = format_historical_data(historical_data, dataset)
if (
not alert_rule.sensitivity
or not alert_rule.seasonality
or alert_rule.threshold_type is None
or alert_rule.organization is None
):
# this won't happen because we've already gone through the serializer, but mypy insists
logger.error("Missing required configuration for an anomaly detection alert")
return None

anomaly_detection_config = AnomalyDetectionConfig(
time_period=window_min,
sensitivity=alert_rule.sensitivity,
direction=translate_direction(alert_rule.threshold_type),
expected_seasonality=alert_rule.seasonality,
)
body = DetectAnomaliesRequest(
organization_id=alert_rule.organization.id,
project_id=project.id,
config=anomaly_detection_config,
context=formatted_data,
)
try:
response = make_signed_seer_api_request(
seer_anomaly_detection_connection_pool,
SEER_ANOMALY_DETECTION_ENDPOINT_URL,
json.dumps(body).encode("utf-8"),
)
except (TimeoutError, MaxRetryError):
logger.exception(
"Timeout error when hitting anomaly detection endpoint",
extra={
"subscription_id": subscription.id,
"dataset": alert_rule.snuba_query.dataset,
"organization_id": alert_rule.organization.id,
"project_id": project.id,
"alert_rule_id": alert_rule.id,
},
)
return None

if response.status != 200:
logger.error(
f"Received {response.status} when calling Seer endpoint {SEER_ANOMALY_DETECTION_ENDPOINT_URL}.", # noqa
extra={"response_data": response.data},
)
return None

return [
{
"timestamp": 0.1,
"value": 100.0,
"anomaly": {
"anomaly_type": AnomalyType.HIGH_CONFIDENCE.value,
"anomaly_value": 100,
try:
results = json.loads(response.data.decode("utf-8")).get("timeseries")
if not results:
logger.warning(
"Seer anomaly detection response returned no potential anomalies",
extra={
"ad_config": anomaly_detection_config,
"context": formatted_data,
"response_data": response.data,
"reponse_code": response.status,
},
)
return None
return results
except (
AttributeError,
UnicodeError,
JSONDecodeError,
):
logger.exception(
"Failed to parse Seer anomaly detection response",
extra={
"ad_config": anomaly_detection_config,
"context": formatted_data,
"response_data": response.data,
"reponse_code": response.status,
},
}
]
)
return None
49 changes: 1 addition & 48 deletions src/sentry/seer/anomaly_detection/store_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,24 @@

from django.conf import settings
from django.core.exceptions import ValidationError
from django.utils import timezone
from urllib3.exceptions import MaxRetryError, TimeoutError

from sentry.conf.server import SEER_ANOMALY_DETECTION_STORE_DATA_URL
from sentry.incidents.models.alert_rule import AlertRule, AlertRuleStatus
from sentry.models.project import Project
from sentry.net.http import connection_from_url
from sentry.search.events.types import SnubaParams
from sentry.seer.anomaly_detection.types import (
AlertInSeer,
AnomalyDetectionConfig,
StoreDataRequest,
)
from sentry.seer.anomaly_detection.utils import (
fetch_historical_data,
format_historical_data,
get_crash_free_historical_data,
translate_direction,
)
from sentry.seer.signed_seer_api import make_signed_seer_api_request
from sentry.snuba import metrics_performance
from sentry.snuba.models import SnubaQuery
from sentry.snuba.referrer import Referrer
from sentry.snuba.utils import get_dataset
from sentry.utils import json
from sentry.utils.snuba import SnubaTSResult
Expand Down Expand Up @@ -122,46 +118,3 @@ def send_historical_data_to_seer(alert_rule: AlertRule, project: Project) -> Ale
if data_end_time - data_start_time < timedelta(days=MIN_DAYS):
return AlertRuleStatus.NOT_ENOUGH_DATA
return AlertRuleStatus.PENDING


def fetch_historical_data(
alert_rule: AlertRule, snuba_query: SnubaQuery, project: Project
) -> SnubaTSResult | None:
"""
Fetch 28 days of historical data from Snuba to pass to Seer to build the anomaly detection model
"""
# TODO: if we can pass the existing timeseries data we have on the front end along here, we can shorten
# the time period we query and combine the data
end = timezone.now()
start = end - timedelta(days=NUM_DAYS)
granularity = snuba_query.time_window

dataset_label = snuba_query.dataset
if dataset_label == "events":
# DATSET_OPTIONS expects the name 'errors'
dataset_label = "errors"
dataset = get_dataset(dataset_label)

if not project or not dataset or not alert_rule.organization:
return None

if dataset == metrics_performance:
return get_crash_free_historical_data(
start, end, project, alert_rule.organization, granularity
)

else:
historical_data = dataset.timeseries_query(
selected_columns=[snuba_query.aggregate],
query=snuba_query.query,
snuba_params=SnubaParams(
organization=alert_rule.organization,
projects=[project],
start=start,
end=end,
),
rollup=granularity,
referrer=Referrer.ANOMALY_DETECTION_HISTORICAL_DATA_QUERY.value,
zerofill_results=True,
)
return historical_data
65 changes: 63 additions & 2 deletions src/sentry/seer/anomaly_detection/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
from datetime import datetime
from datetime import datetime, timedelta
from typing import Any

from django.utils import timezone
from django.utils.datastructures import MultiValueDict

from sentry import release_health
from sentry.incidents.models.alert_rule import AlertRuleThresholdType
from sentry.incidents.models.alert_rule import AlertRule, AlertRuleThresholdType
from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.search.events.types import SnubaParams
from sentry.seer.anomaly_detection.types import TimeSeriesPoint
from sentry.snuba import metrics_performance
from sentry.snuba.models import SnubaQuery
from sentry.snuba.referrer import Referrer
from sentry.snuba.sessions_v2 import QueryDefinition
from sentry.snuba.utils import get_dataset
from sentry.utils.snuba import SnubaTSResult


Expand Down Expand Up @@ -101,3 +106,59 @@ def format_historical_data(data: SnubaTSResult, dataset: Any) -> list[TimeSeries
ts_point = TimeSeriesPoint(timestamp=datum.get("time"), value=datum.get("count", 0))
formatted_data.append(ts_point)
return formatted_data


def fetch_historical_data(
alert_rule: AlertRule,
snuba_query: SnubaQuery,
project: Project,
start: datetime | None = None,
end: datetime | None = None,
) -> SnubaTSResult | None:
"""
Fetch 28 days of historical data from Snuba to pass to Seer to build the anomaly detection model
"""
# TODO: if we can pass the existing timeseries data we have on the front end along here, we can shorten
# the time period we query and combine the data
is_store_data_request = False
if end is None:
is_store_data_request = True
end = timezone.now()
# doing it this way to suppress typing errors
if start is None:
start = end - timedelta(days=NUM_DAYS)
granularity = snuba_query.time_window

dataset_label = snuba_query.dataset
if dataset_label == "events":
# DATSET_OPTIONS expects the name 'errors'
dataset_label = "errors"
dataset = get_dataset(dataset_label)

if not project or not dataset or not alert_rule.organization:
return None

if dataset == metrics_performance:
return get_crash_free_historical_data(
start, end, project, alert_rule.organization, granularity
)

else:
historical_data = dataset.timeseries_query(
selected_columns=[snuba_query.aggregate],
query=snuba_query.query,
snuba_params=SnubaParams(
organization=alert_rule.organization,
projects=[project],
start=start,
end=end,
),
rollup=granularity,
referrer=(
Referrer.ANOMALY_DETECTION_HISTORICAL_DATA_QUERY.value
if is_store_data_request
else Referrer.ANOMALY_DETECTION_RETURN_HISTORICAL_ANOMALIES.value
),
zerofill_results=True,
)
return historical_data
3 changes: 3 additions & 0 deletions src/sentry/snuba/referrer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ class Referrer(Enum):
ALERTRULESERIALIZER_TEST_QUERY_PRIMARY = "alertruleserializer.test_query.primary"
ALERTRULESERIALIZER_TEST_QUERY = "alertruleserializer.test_query"
ANOMALY_DETECTION_HISTORICAL_DATA_QUERY = "anomaly_detection_historical_data_query"
ANOMALY_DETECTION_RETURN_HISTORICAL_ANOMALIES = (
"anomaly_detection_get_historical_anomalies_query"
)
API_ALERTS_ALERT_RULE_CHART_METRICS_ENHANCED = "api.alerts.alert-rule-chart.metrics-enhanced"
API_ALERTS_ALERT_RULE_CHART = "api.alerts.alert-rule-chart"
API_ALERTS_CHARTCUTERIE = "api.alerts.chartcuterie"
Expand Down
Loading

0 comments on commit ca59242

Please sign in to comment.