Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metrics match for PR check #835

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions tests/scripts/remote_monitoring_tests/helpers/kruize.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,16 @@ def create_performance_profile(perf_profile_json_file):
print(response.text)
return response

# Description: This function obtains the metrics from Kruize Autotune using metrics API
def get_metrics():
print("\nGenerating Metrics...")
url = URL + "/metrics"
print("URL = ", url)

response = requests.get(url = url)

print("Response status code = ", response.status_code)
print("\n************************************************************")
print(response.text)
return response

81 changes: 81 additions & 0 deletions tests/scripts/remote_monitoring_tests/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,33 @@
INVALID_DEPLOYMENT_TYPE_MSG = "Invalid deployment type: xyz"
INVALID_INTERVAL_DURATION_MSG = "Interval duration cannot be less than or greater than measurement_duration by more than 30 seconds"

createExperimentCount_STATUS = "Metric Category: API, Metric Type: count, Name: createExperiment, Value: 10.0"
createExperimentSum_STATUS = "Metric Category: API, Metric Type: sum, Name: createExperiment, Value: 0.228229311"
listRecommendationsCount_STATUS = "Metric Category: API, Metric Type: count, Name: listRecommendations, Value: 11.0"
listRecommendationsSum_STATUS = "Metric Category: API, Metric Type: sum, Name: listRecommendations, Value: 0.349422898"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kusumachalasani - I think count may not change but sum can change even for the same no. of exps / results. Have you run this multiple times to check this? Should we specify a range instead?

Copy link
Contributor Author

@kusumachalasani kusumachalasani May 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On my system, the sum remained the same. I need to validate if it is same for systems where the PR check is run.
Also, the sum values mentioned here needs to be updated after the first run that happens on the system where PR check is run considering the resources it has. Will wait for the Bharath fix to validate.
I have a plan to introduce the ranges, but as I mentioned want to validate the data first on these systems.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can create the github workflow and run it from your branch by commenting out the failing assertion.

listExperimentsCount_STATUS = "Metric Category: API, Metric Type: count, Name: listExperiments, Value: 0.0"
listExperimentsSum_STATUS = "Metric Category: API, Metric Type: sum, Name: listExperiments, Value: 0.0"
updateResultsCount_STATUS = "Metric Category: API, Metric Type: count, Name: updateResults, Value: 1000.0"
updateResultsSum_STATUS = "Metric Category: API, Metric Type: sum, Name: updateResults, Value: 51.24196982"
addRecommendationToDBcount_STATUS = "Metric Category: DB, Metric Type: count, Name: addRecommendationToDB, Value: 50.0"
addRecommendationToDBsum_STATUS = "Metric Category: DB, Metric Type: sum, Name: addRecommendationToDB, Value: 0.11283727"
addResultsToDBCount_STATUS = "Metric Category: DB, Metric Type: count, Name: addResultsToDB, Value: 1000.0"
addResultsToDBSum_STATUS = "Metric Category: DB, Metric Type: sum, Name: addResultsToDB, Value: 3.513978451"
loadAllRecommendationsCount_STATUS = "Metric Category: DB, Metric Type: count, Name: loadAllRecommendations, Value: 0.0"
loadAllRecommendationsSum_STATUS = "Metric Category: DB, Metric Type: sum, Name: loadAllRecommendations, Value: 0.0"
loadAllExperimentsCount_STATUS = "Metric Category: DB, Metric Type: count, Name: loadAllExperiments, Value: 0.0"
loadAllExperimentsSum_STATUS = "Metric Category: DB, Metric Type: sum, Name: loadAllExperiments, Value: 0.0"
addExperimentToDBCount_STATUS = "Metric Category: DB, Metric Type: count, Name: addExperimentToDB, Value: 10.0"
addExperimentToDBSum_STATUS = "Metric Category: DB, Metric Type: sum, Name: addExperimentToDB, Value: 0.132267294"
loadResultsByExperimentNameCount_STATUS = "Metric Category: DB, Metric Type: count, Name: loadResultsByExperimentName, Value: 1000.0"
loadResultsByExperimentNameSum_STATUS = "Metric Category: DB, Metric Type: sum, Name: loadResultsByExperimentName, Value: 16.355101603"
loadExperimentByNameCount_STATUS = "Metric Category: DB, Metric Type: count, Name: loadExperimentByName, Value: 1031.0"
loadExperimentByNameSum_STATUS = "Metric Category: DB, Metric Type: sum, Name: loadExperimentByName, Value: 1.665818586"
loadAllResultsCount_STATUS = "Metric Category: DB, Metric Type: count, Name: loadAllResults, Value: 0.0"
loadAllResultsSum_STATUS = "Metric Category: DB, Metric Type: sum, Name: loadAllResults, Value: 0.0"
loadRecommendationsByExperimentNameCount_STATUS = "Metric Category: DB, Metric Type: count, Name: loadRecommendationsByExperimentName, Value: 11.0"
loadRecommendationsByExperimentNameSum_STATUS = "Metric Category: DB, Metric Type: sum, Name: loadRecommendationsByExperimentName, Value: 0.090203608"

time_log_csv = "/tmp/time_log.csv"

# DURATION - No. of days * 24.0 hrs
Expand Down Expand Up @@ -372,3 +399,57 @@ def time_diff_in_hours(interval_start_time, interval_end_time):
diff = end_date - start_date
return round(diff.total_seconds() / 3600, 2)

def match_metrics(output_metrics):
api_pattern = re.compile(r'kruizeAPI_seconds_(count|sum){api="(\w+)",application="Kruize",method="(\w+)",}\s+([\d.]+)')
db_pattern = re.compile(r'kruizeDB_seconds_(count|sum){application="Kruize",method="(\w+)",}\s+([\d.]+)')
output_metrics = output_metrics.content.decode('utf-8')
api_matches = api_pattern.findall(output_metrics)
db_matches = db_pattern.findall(output_metrics)
match_metrics = []
for match in api_matches:
metric_type = match[0]
api_name = match[1]
http_method = match[2]
value = float(match[3])
match_metrics.append(('API', metric_type, api_name, http_method, value))

for match in db_matches:
metric_type = match[0]
method = match[1]
value = float(match[2])
match_metrics.append(('DB', metric_type, method, None, value))

metrics = []
for metric in match_metrics:
metric_category, metric_type, name, http_method, value = metric
metrics = f"Metric Category: {metric_category}, Metric Type: {metric_type}, Name: {name}"
metrics += f", Value: {value}"
metrics.append(metrics)

assert createExperimentCount_STATUS in metrics, "createExperimentCount_STATUS assertion failed"
assert createExperimentSum_STATUS in metrics, "createExperimentSum_STATUS assertion failed"
assert listRecommendationsCount_STATUS in metrics, "listRecommendationsCount_STATUS assertion failed"
assert listRecommendationsSum_STATUS in metrics, "listRecommendationsSum_STATUS assertion failed"
assert listExperimentsCount_STATUS in metrics, "listExperimentsCount_STATUS assertion failed"
assert listExperimentsSum_STATUS in metrics, "listExperimentsSum_STATUS assertion failed"
assert updateResultsCount_STATUS in metrics, "updateResultsCount_STATUS assertion failed"
assert updateResultsSum_STATUS in metrics, "updateResultsSum_STATUS assertion failed"
assert addRecommendationToDBcount_STATUS in metrics, "addRecommendationToDBcount_STATUS assertion failed"
assert addRecommendationToDBsum_STATUS in metrics, "addRecommendationToDBsum_STATUS assertion failed"
assert addResultsToDBCount_STATUS in metrics, "addResultsToDBCount_STATUS assertion failed"
assert addResultsToDBSum_STATUS in metrics, "addResultsToDBSum_STATUS assertion failed"
assert loadAllRecommendationsCount_STATUS in metrics, "loadAllRecommendationsCount_STATUS assertion failed"
assert loadAllRecommendationsSum_STATUS in metrics, "loadAllRecommendationsSum_STATUS assertion failed"
assert loadAllExperimentsCount_STATUS in metrics, "loadAllExperimentsCount_STATUS assertion failed"
assert loadAllExperimentsSum_STATUS in metrics, "loadAllExperimentsSum_STATUS assertion failed"
assert addExperimentToDBCount_STATUS in metrics, "addExperimentToDBCount_STATUS assertion failed"
assert addExperimentToDBSum_STATUS in metrics, "addExperimentToDBSum_STATUS assertion failed"
assert loadResultsByExperimentNameCount_STATUS in metrics, "loadResultsByExperimentNameCount_STATUS assertion failed"
assert loadResultsByExperimentNameSum_STATUS in metrics, "loadResultsByExperimentNameSum_STATUS assertion failed"
assert loadExperimentByNameCount_STATUS in metrics, "loadExperimentByNameCount_STATUS assertion failed"
assert loadExperimentByNameSum_STATUS in metrics, "loadExperimentByNameSum_STATUS assertion failed"
assert loadAllResultsCount_STATUS in metrics, "loadAllResultsCount_STATUS assertion failed"
assert loadAllResultsSum_STATUS in metrics, "loadAllResultsSum_STATUS assertion failed"
assert loadRecommendationsByExperimentNameCount_STATUS in metrics, "loadRecommendationsByExperimentNameCount_STATUS assertion failed"
assert loadRecommendationsByExperimentNameSum_STATUS in metrics, "loadRecommendationsByExperimentNameSum_STATUS assertion failed"

Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ def test_list_recommendations_multiple_exps_from_diff_json_files(cluster_type):
INVALID_EXP_NAME_MSG = "Given experiment name - \" " + experiment_name + " \" is not valid"
assert data['message'] == INVALID_EXP_NAME_MSG, f"expected - {INVALID_EXP_NAME_MSG}, actual - {data['message']}"

# Delete all the experiments
# Validate the metrics for Kruize Autotune
response = get_metrics()
match_metrics(response)

# Delete all the experiments
for i in range(num_exps):
json_file = "/tmp/create_exp_" + str(i) + ".json"
response = delete_experiment(json_file)
Expand Down