Skip to content

Commit

Permalink
Add retries to result uploading (dotnet#2970)
Browse files Browse the repository at this point in the history
* Put the upload blob and queue send message into a looping try catch that tries to do the action 3 times with 5 seconds between attempts.
* Moved the retry to the common performance file for use anywhere a retry on exception may be warranted. Then updated the upload.py upload and queue steps to use this new function, along with updating upload to hold a failure instead of excepting out at the first failure. This will ensure we still collect as much data as possible regardless of if the upload flow is broken.
  • Loading branch information
LoopedBard3 committed Apr 25, 2023
1 parent 9e21af2 commit b5e854a
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 9 deletions.
28 changes: 28 additions & 0 deletions scripts/performance/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import os
import sys
import time


def get_machine_architecture():
Expand Down Expand Up @@ -157,6 +158,33 @@ def push_dir(path: str = None) -> None:
else:
yield

def retry_on_exception(function, retry_count=3, retry_delay=5, retry_delay_multiplier=1, retry_on_exception=Exception):
'''
Retries the specified function if it throws an exception.
:param function: The function to execute.
:param retry_count: The number of times to retry the function.
:param retry_delay: The delay between retries (seconds).
:param retry_delay_multiplier: The multiplier to apply to the retry delay after failure.
:param retry_on_exception: The exception to retry on (Defaults to Exception).
'''
if retry_count < 0:
raise ValueError('retry_count must be >= 0')
if retry_delay < 0:
raise ValueError('retry_delay must be >= 0')
if retry_delay_multiplier < 1:
raise ValueError('retry_delay_multiplier must be >= 1')

for i in range(retry_count):
try:
return function()
except retry_on_exception as e:
if i == retry_count - 1:
raise
getLogger().info('Exception caught: %s', e)
getLogger().info('Retrying in %d seconds...', retry_delay)
time.sleep(retry_delay)
retry_delay *= retry_delay_multiplier

class RunCommand:
'''
Expand Down
35 changes: 26 additions & 9 deletions scripts/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from azure.storage.queue import QueueClient, TextBase64EncodePolicy
from traceback import format_exc
from glob import glob
from performance.common import retry_on_exception
import os
import time

from logging import getLogger

Expand All @@ -22,23 +24,38 @@ def upload(globpath, container, queue, sas_token_env, storage_account_uri):
return 1

files = glob(globpath, recursive=True)

any_upload_or_queue_failed = False
for infile in files:
blob_name = get_unique_name(infile, os.getenv('HELIX_WORKITEM_ID'))

getLogger().info("uploading {}".format(infile))

blob_client = BlobClient(account_url=storage_account_uri.format('blob'), container_name=container, blob_name=blob_name, credential=sas_token)

upload_succeded = False
with open(infile, "rb") as data:
blob_client.upload_blob(data, blob_type="BlockBlob", content_settings=ContentSettings(content_type="application/json"))

if queue is not None:
queue_client = QueueClient(account_url=storage_account_uri.format('queue'), queue_name=queue, credential=sas_token, message_encode_policy=TextBase64EncodePolicy())
queue_client.send_message(blob_client.url)

getLogger().info("upload complete")
return 0
try:
retry_on_exception(lambda: blob_client.upload_blob(data, blob_type="BlockBlob", content_settings=ContentSettings(content_type="application/json")))
upload_succeded = True
except Exception as ex:
any_upload_or_queue_failed = True
getLogger().error("upload failed")
getLogger().error('{0}: {1}'.format(type(ex), str(ex)))

if upload_succeded:
if queue is not None:
try:
queue_client = QueueClient(account_url=storage_account_uri.format('queue'), queue_name=queue, credential=sas_token, message_encode_policy=TextBase64EncodePolicy())
retry_on_exception(lambda: queue_client.send_message(blob_client.url))
getLogger().info("upload and queue complete")
except Exception as ex:
any_upload_or_queue_failed = True
getLogger().error("queue failed")
getLogger().error('{0}: {1}'.format(type(ex), str(ex)))
else:
getLogger().info("upload complete")

return any_upload_or_queue_failed # 0 (False) if all uploads and queues succeeded, 1 (True) otherwise

except Exception as ex:
getLogger().error('{0}: {1}'.format(type(ex), str(ex)))
Expand Down

0 comments on commit b5e854a

Please sign in to comment.