Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
KuilongCui committed Sep 19, 2024
1 parent 7123f26 commit d7b0102
Showing 1 changed file with 26 additions and 23 deletions.
49 changes: 26 additions & 23 deletions tests/e2e_test/test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,29 +75,32 @@ def clear_ray_state():
def parse_log_file():
json_files = [f for f in os.listdir('.') if f.endswith('_latency_info.json')]

decode_latencies = []

for json_file in json_files:
with open(json_file, 'r', encoding="utf-8") as file:
data = json.load(file)[0]

decode_latencies.append(data.get('decode_latencies', []))

latencies_array = np.array(decode_latencies)

p25 = np.percentile(latencies_array, 25)
p50 = np.percentile(latencies_array, 50)
p75 = np.percentile(latencies_array, 75)
p95 = np.percentile(latencies_array, 95)
p99 = np.percentile(latencies_array, 99)
mean = np.mean(latencies_array)

data = [
["bench_decode", "p25", "p50", "p75", "p95", "p99", "mean"],
["latency(ms)", f"{p25:.2f}", f"{p50:.2f}", f"{p75:.2f}", f"{p95:.2f}", f"{p99:.2f}", f"{mean:.2f}"]
]

return to_markdown_table(data)
def get_markdown_data(filter: str, head_name: str):
latencies = []
for json_file in json_files:
with open(json_file, 'r', encoding="utf-8") as file:
data = json.load(file)[0]

latencies.append(data.get('decode_latencies', []))

latencies_array = np.array(latencies)

p25 = np.percentile(latencies_array, 25)
p50 = np.percentile(latencies_array, 50)
p75 = np.percentile(latencies_array, 75)
p95 = np.percentile(latencies_array, 95)
p99 = np.percentile(latencies_array, 99)
mean = np.mean(latencies_array)

data = [
[head_name, "p25", "p50", "p75", "p95", "p99", "mean"],
["latency(ms)", f"{p25:.2f}", f"{p50:.2f}", f"{p75:.2f}", f"{p95:.2f}", f"{p99:.2f}", f"{mean:.2f}"]
]

decode_data = get_markdown_data('decode_latencies', 'decode')
prefill_data = get_markdown_data('prefilll_latencies', 'prefill')

return to_markdown_table(prefill_data) + "\n\n" + to_markdown_table(decode_data)

@pytest.mark.asyncio
@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="at least 1 gpus required for simple benchmark")
Expand Down

0 comments on commit d7b0102

Please sign in to comment.