Skip to content

Commit

Permalink
fix(weekly-reports): dupe on report timestamp instead of batch (#76031)
Browse files Browse the repository at this point in the history
- It has been observed that a portion of our duplicate weekly emails
come from the schedule celery task being killed and restarted, which
will cause duplicates for any orgs that were queued in the first batch.
- this also generates a new batch_id, which causes our de-duplication
logic to not work, as batch_id is included in the redis key that we use.
- [x] This PR modifies our logic so that the dupe checker instead uses
the timestamp for which the weekly report is generated on. this should
be a safe way to ensure no duplicates occur across batches.
- [x] Updates the test.
  • Loading branch information
JoshFerge authored Aug 13, 2024
1 parent 8505e05 commit 3c02b74
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 8 deletions.
17 changes: 13 additions & 4 deletions src/sentry/tasks/summaries/weekly_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import uuid
from collections.abc import Mapping, Sequence
from dataclasses import dataclass
from datetime import timedelta
from datetime import datetime, timedelta
from functools import partial
from typing import Any, cast

Expand Down Expand Up @@ -293,7 +293,7 @@ def _send_to_user(self, user_template_context: Mapping[str, Any]) -> None:
template_context: Mapping[str, Any] | None = user_template_context.get("context")
user_id: int | None = user_template_context.get("user_id")
if template_context and user_id:
dupe_check = _DuplicateDeliveryCheck(self, user_id)
dupe_check = _DuplicateDeliveryCheck(self, user_id, self.ctx.timestamp)
if not dupe_check.check_for_duplicate_delivery():
self.send_email(template_ctx=template_context, user_id=user_id)
dupe_check.record_delivery()
Expand Down Expand Up @@ -337,9 +337,13 @@ def send_email(self, template_ctx: Mapping[str, Any], user_id: int) -> None:


class _DuplicateDeliveryCheck:
def __init__(self, batch: OrganizationReportBatch, user_id: int):
def __init__(self, batch: OrganizationReportBatch, user_id: int, timestamp: float):
self.batch = batch
self.user_id = user_id
# note that if the timestamps between batches cross a UTC day boundary,
# this will not work correctly. but we always start reports at midnight UTC,
# so that is unlikely to be an issue.
self.report_date = datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d")

# Tracks state from `check_for_duplicate_delivery` to `record_delivery`
self.count: int | None = None
Expand All @@ -349,7 +353,11 @@ def _get_redis_cluster(self) -> LocalClient:

@property
def _redis_name(self) -> str:
name_parts = (self.batch.batch_id, self.batch.ctx.organization.id, self.user_id)
name_parts = (
self.report_date,
self.batch.ctx.organization.id,
self.user_id,
)
return ":".join(str(part) for part in name_parts)

def _get_log_extras(self) -> dict[str, Any]:
Expand All @@ -358,6 +366,7 @@ def _get_log_extras(self) -> dict[str, Any]:
"organization": self.batch.ctx.organization.id,
"user_id": self.user_id,
"has_email_override": bool(self.batch.email_override),
"report_date": self.report_date,
}

def check_for_duplicate_delivery(self) -> bool:
Expand Down
10 changes: 6 additions & 4 deletions tests/sentry/tasks/test_weekly_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -1011,24 +1011,26 @@ def test_duplicate_detection(self, mock_send_email, mock_prepare_template_contex
ctx = OrganizationReportContext(0, 0, self.organization)
template_context = prepare_template_context(ctx, [self.user.id])
mock_prepare_template_context.return_value = template_context
batch_id = UUID("abe8ba3e-90af-4a98-b925-5f30250ae6a0")
batch1_id = UUID("abe8ba3e-90af-4a98-b925-5f30250ae6a0")
batch2_id = UUID("abe8ba3e-90af-4a98-b925-5f30250ae6a1")
self._set_option_value("always")

# First send
OrganizationReportBatch(ctx, batch_id).deliver_reports()
OrganizationReportBatch(ctx, batch1_id).deliver_reports()
assert mock_send_email.call_count == 1
mock_logger.error.assert_not_called()

# Duplicate send
OrganizationReportBatch(ctx, batch_id).deliver_reports()
OrganizationReportBatch(ctx, batch2_id).deliver_reports()
assert mock_send_email.call_count == 1
assert mock_logger.error.call_count == 1
mock_logger.error.assert_called_once_with(
"weekly_report.delivery_record.duplicate_detected",
extra={
"batch_id": str(batch_id),
"batch_id": str(batch2_id),
"organization": self.organization.id,
"user_id": self.user.id,
"has_email_override": False,
"report_date": "1970-01-01",
},
)

0 comments on commit 3c02b74

Please sign in to comment.