Skip to content

Commit

Permalink
Fix defect where duplicate data is saved if one metric requires split…
Browse files Browse the repository at this point in the history
…ting up into multiple log lines (#102)

Co-authored-by: reecepeg <reecepeg@amazon.com>
  • Loading branch information
liquidpele and reecepeg authored May 18, 2023
1 parent f1db319 commit f44aaf2
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 1 deletion.
8 changes: 7 additions & 1 deletion aws_embedded_metrics/serializers/log_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,19 @@ def create_body() -> Dict[str, Any]:

# Track batch number to know where to slice metric data
i = 0

complete_metrics = set()
while remaining_data:
remaining_data = False
current_body = create_body()

for metric_name, metric in context.metrics.items():
# ensure we don't add duplicates of metrics we already completed
if metric_name in complete_metrics:
continue

if len(metric.values) == 1:
current_body[metric_name] = metric.values[0]
complete_metrics.add(metric_name)
else:
# Slice metric data as each batch cannot contain more than
# MAX_DATAPOINTS_PER_METRIC entries for a given metric
Expand All @@ -87,6 +91,8 @@ def create_body() -> Dict[str, Any]:
# of the metric value list
if len(metric.values) > end_index:
remaining_data = True
else:
complete_metrics.add(metric_name)

metric_body = {"Name": metric_name, "Unit": metric.unit}
if metric.storage_resolution == StorageResolution.HIGH:
Expand Down
27 changes: 27 additions & 0 deletions tests/serializer/test_log_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,33 @@ def test_serialize_with_more_than_100_metrics_and_datapoints():
assert metric_results == expected_results


def test_serialize_no_duplication_bug():
"""
A bug existed where metrics with lots of values have to be broken up
but single value metrics got duplicated across each section.
This test verifies the fix to ensure no duplication.
"""
context = get_context()
single_expected_result = 1
single_found_result = 0

# create a metric with a single value
single_key = "Metric-single"
context.put_metric(single_key, single_expected_result)
# add a lot of another metric so the log batches must be broken up
for i in range(1000):
context.put_metric("Metric-many", 0)

results = serializer.serialize(context)

# count up all values for the single metric to ensure no duplicates
for batch in results:
for metric_key, value in json.loads(batch).items():
if metric_key == single_key:
single_found_result += value
assert single_expected_result == single_found_result


def test_serialize_with_multiple_metrics():
# arrange
metrics = 2
Expand Down

0 comments on commit f44aaf2

Please sign in to comment.