Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(integrations): metrics for on-call integration SLOs #78660

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions src/sentry/integrations/on_call/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from collections.abc import Mapping
from enum import Enum
from typing import Any

from attr import dataclass

from sentry.integrations.opsgenie.spec import OpsgenieOnCallSpec
from sentry.integrations.utils.metrics import EventLifecycleMetric, EventLifecycleOutcome
from sentry.models.organization import Organization
from sentry.organizations.services.organization import RpcOrganization
from sentry.users.models import User
from sentry.users.services.user import RpcUser


class OnCallInteractionType(Enum):
"""
A way in which a user can interact with Sentry through an on-call app.
"""

# TODO: add the interactions
# General interactions
ADD_KEY = "ADD_KEY"
POST_INSTALL = "POST_INSTALL"
# Interacting with external alerts
CREATE = "CREATE" # create an alert in Opsgenie/Pagerduty
RESOLVE = "RESOLVE" # resolve an alert in Opsgenie/Pagerduty
SEND_NOTIFICATION = "SEND_NOTIFICATION"

# Opsgenie only
VERIFY_KEYS = "VERIFY_KEYS"
MIGRATE_PLUGIN = "MIGRATE_PLUGIN"

def __str__(self) -> str:
return self.value.lower()


@dataclass
class OnCallInteractionEvent(EventLifecycleMetric):
"""
An instance to be recorded of a user interacting with Sentry through an on-call app.
"""

interaction_type: OnCallInteractionType
spec: OpsgenieOnCallSpec # TODO: also add pagerduty oncall spec, or make a common spec class to inherit

# Optional attributes to populate extras
user: User | RpcUser | None = None
organization: Organization | RpcOrganization | None = None

def get_key(self, outcome: EventLifecycleOutcome) -> str:
return self.get_standard_key(
domain="on_call",
integration_name=self.spec.provider_slug,
interaction_type=str(self.interaction_type),
outcome=outcome,
)

def get_extras(self) -> Mapping[str, Any]:
return {
"user_id": (self.user.id if self.user else None),
"organization_id": (self.organization.id if self.organization else None),
}
39 changes: 21 additions & 18 deletions src/sentry/integrations/opsgenie/actions/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from django import forms
from django.utils.translation import gettext_lazy as _

from sentry.integrations.opsgenie.integration import OpsgenieIntegration
from sentry.integrations.on_call.metrics import OnCallInteractionType
from sentry.integrations.opsgenie.integration import OpsgenieIntegration, record_event
from sentry.integrations.opsgenie.utils import get_team
from sentry.integrations.services.integration import integration_service
from sentry.integrations.services.integration.model import (
Expand Down Expand Up @@ -100,24 +101,26 @@ def _validate_team(self, team_id: str | None, integration_id: int | None) -> Non
code="invalid_integration",
params=params,
)
team_status = self._get_team_status(
team_id=team_id, integration=integration, org_integration=org_integration
)
if team_status == INVALID_TEAM:
raise forms.ValidationError(
_('The team "%(team)s" does not belong to the %(account)s Opsgenie account.'),
code="invalid_team",
params=params,
)
elif team_status == INVALID_KEY:
raise forms.ValidationError(
_(
'The provided API key is invalid. Please make sure that the Opsgenie API \
key is an integration key of type "Sentry" that has configuration access.'
),
code="invalid_key",
params=params,
with record_event(OnCallInteractionType.VERIFY_KEY).capture():
team_status = self._get_team_status(
team_id=team_id, integration=integration, org_integration=org_integration
)
# TODO: add failure reasons to lifecycle record failure
if team_status == INVALID_TEAM:
raise forms.ValidationError(
_('The team "%(team)s" does not belong to the %(account)s Opsgenie account.'),
code="invalid_team",
params=params,
)
elif team_status == INVALID_KEY:
raise forms.ValidationError(
_(
'The provided API key is invalid. Please make sure that the Opsgenie API \
key is an integration key of type "Sentry" that has configuration access.'
),
code="invalid_key",
params=params,
)

def clean(self) -> dict[str, Any] | None:
cleaned_data = super().clean()
Expand Down
40 changes: 23 additions & 17 deletions src/sentry/integrations/opsgenie/actions/notification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@

import sentry_sdk

from sentry.integrations.on_call.metrics import OnCallInteractionType
from sentry.integrations.opsgenie.actions import OpsgenieNotifyTeamForm
from sentry.integrations.opsgenie.client import OPSGENIE_DEFAULT_PRIORITY, OpsgeniePriority
from sentry.integrations.opsgenie.integration import record_event
from sentry.integrations.opsgenie.utils import get_team
from sentry.integrations.services.integration import integration_service
from sentry.rules.actions import IntegrationEventAction
Expand Down Expand Up @@ -69,23 +71,27 @@ def send_notification(event, futures):
except Exception as e:
sentry_sdk.capture_exception(e)
return
try:
rules = [f.rule for f in futures]
resp = client.send_notification(
data=event, priority=priority, rules=rules, notification_uuid=notification_uuid
)
except ApiError as e:
logger.info(
"rule.fail.opsgenie_notification",
extra={
"error": str(e),
"team_name": team["team"],
"team_id": team["id"],
"project_id": event.project_id,
"event_id": event.event_id,
},
)
raise
with record_event(OnCallInteractionType.SEND_NOTIFICATION).capture():
try:
rules = [f.rule for f in futures]
resp = client.send_notification(
data=event,
priority=priority,
rules=rules,
notification_uuid=notification_uuid,
)
except ApiError as e:
logger.info(
"rule.fail.opsgenie_notification",
extra={
"error": str(e),
"team_name": team["team"],
"team_id": team["id"],
"project_id": event.project_id,
"event_id": event.event_id,
},
)
raise

logger.info(
"rule.success.opsgenie_notification",
Expand Down
9 changes: 8 additions & 1 deletion src/sentry/integrations/opsgenie/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from sentry.eventstore.models import Event, GroupEvent
from sentry.integrations.client import ApiClient
from sentry.integrations.models.integration import Integration
from sentry.integrations.on_call.metrics import OnCallInteractionType
from sentry.integrations.opsgenie.integration import record_event
from sentry.integrations.services.integration.model import RpcIntegration
from sentry.models.group import Group
from sentry.shared_integrations.client.base import BaseApiResponseX
Expand Down Expand Up @@ -97,6 +99,7 @@ def send_notification(
notification_uuid: str | None = None,
):
headers = self._get_auth_headers()
interaction_type = OnCallInteractionType.CREATE
if isinstance(data, (Event, GroupEvent)):
group = data.group
event = data
Expand All @@ -111,6 +114,7 @@ def send_notification(
else:
# if we're acknowledging the alert—meaning that the Sentry alert was resolved
if data.get("identifier"):
interaction_type = OnCallInteractionType.RESOLVE
alias = data["identifier"]
resp = self.post(
f"/alerts/{alias}/acknowledge",
Expand All @@ -121,5 +125,8 @@ def send_notification(
return resp
# this is a metric alert
payload = data
resp = self.post("/alerts", data=payload, headers=headers)
with record_event(interaction_type).capture() as lifecycle:
resp = self.post("/alerts", data=payload, headers=headers)
if resp.status_code >= 400:
lifecycle.record_failure()
return resp
99 changes: 54 additions & 45 deletions src/sentry/integrations/opsgenie/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
)
from sentry.integrations.models.integration import Integration
from sentry.integrations.models.organization_integration import OrganizationIntegration
from sentry.integrations.on_call.metrics import OnCallInteractionEvent, OnCallInteractionType
from sentry.integrations.opsgenie.spec import OpsgenieOnCallSpec
from sentry.integrations.opsgenie.tasks import migrate_opsgenie_plugin
from sentry.organizations.services.organization import RpcOrganizationSummary
from sentry.pipeline import PipelineView
Expand Down Expand Up @@ -74,6 +76,10 @@
}


def record_event(event: OnCallInteractionType):
return OnCallInteractionEvent(event, OpsgenieOnCallSpec())


class InstallationForm(forms.Form):
base_url = forms.ChoiceField(
label=_("Base URL"),
Expand Down Expand Up @@ -180,50 +186,52 @@ def update_organization_config(self, data: MutableMapping[str, Any]) -> None:
team["id"] = str(self.org_integration.id) + "-" + team["team"]

invalid_keys = []
for team in teams:
# skip if team, key pair already exist in config
if (team["team"], team["integration_key"]) in existing_team_key_pairs:
continue

integration_key = team["integration_key"]

# validate integration keys
client = OpsgenieClient(
integration=integration,
integration_key=integration_key,
)
# call an API to test the integration key
try:
client.get_alerts()
except ApiError as e:
logger.info(
"opsgenie.authorization_error",
extra={"error": str(e), "status_code": e.code},
with record_event(OnCallInteractionType.VERIFY_KEYS).capture():
for team in teams:
# skip if team, key pair already exist in config
if (team["team"], team["integration_key"]) in existing_team_key_pairs:
continue

integration_key = team["integration_key"]

# validate integration keys
client = OpsgenieClient(
integration=integration,
integration_key=integration_key,
)
if e.code == 429:
raise ApiRateLimitedError(
"Too many requests. Please try updating one team/key at a time."
# call an API to test the integration key
try:
client.get_alerts()
except ApiError as e:
logger.info(
"opsgenie.authorization_error",
extra={"error": str(e), "status_code": e.code},
)
elif e.code == 401:
invalid_keys.append(integration_key)
pass
elif e.json and e.json.get("message"):
raise ApiError(e.json["message"])
else:
raise

if invalid_keys:
raise ApiUnauthorized(f"Invalid integration key: {str(invalid_keys)}")
if e.code == 429:
raise ApiRateLimitedError(
"Too many requests. Please try updating one team/key at a time."
)
elif e.code == 401:
invalid_keys.append(integration_key)
pass
elif e.json and e.json.get("message"):
raise ApiError(e.json["message"])
else:
raise

if invalid_keys:
raise ApiUnauthorized(f"Invalid integration key: {str(invalid_keys)}")

return super().update_organization_config(data)

def schedule_migrate_opsgenie_plugin(self):
migrate_opsgenie_plugin.apply_async(
kwargs={
"integration_id": self.model.id,
"organization_id": self.organization_id,
}
)
with record_event(OnCallInteractionType.MIGRATE_PLUGIN).capture():
migrate_opsgenie_plugin.apply_async(
kwargs={
"integration_id": self.model.id,
"organization_id": self.organization_id,
}
)


class OpsgenieIntegrationProvider(IntegrationProvider):
Expand Down Expand Up @@ -256,14 +264,15 @@ def post_install(
organization: RpcOrganizationSummary,
extra: Any | None = None,
) -> None:
try:
org_integration = OrganizationIntegration.objects.get(
integration=integration, organization_id=organization.id
)
with record_event(OnCallInteractionType.POST_INSTALL).capture():
try:
org_integration = OrganizationIntegration.objects.get(
integration=integration, organization_id=organization.id
)

except OrganizationIntegration.DoesNotExist:
logger.exception("The Opsgenie post_install step failed.")
return
except OrganizationIntegration.DoesNotExist:
logger.exception("The Opsgenie post_install step failed.")
return

key = integration.metadata["api_key"]
team_table = []
Expand Down
13 changes: 13 additions & 0 deletions src/sentry/integrations/opsgenie/spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from sentry.models.notificationaction import ActionService

PROVIDER = "opsgenie"


class OpsgenieOnCallSpec:
@property
def provider_slug(self) -> str:
return PROVIDER

@property
def action_service(self) -> ActionService:
return ActionService.OPSGENIE
Loading