diff --git a/plugins/openai_plugin.py b/plugins/openai_plugin.py index 4d86b965..64e9b0e9 100644 --- a/plugins/openai_plugin.py +++ b/plugins/openai_plugin.py @@ -137,6 +137,9 @@ def streaming_request_http(self, query: dict, user_id: int, test_end_time: float "max_tokens": query["output_tokens"], "temperature": 0.1, "stream": True, + "stream_options": { + "include_usage": True + } } if "/v1/chat/completions" in self.host: data["messages"] = [ @@ -150,7 +153,7 @@ def streaming_request_http(self, query: dict, user_id: int, test_end_time: float if self.model_name is not None: data["model"] = self.model_name - result = RequestResult(user_id, query.get("input_id"), query.get("input_tokens")) + result = RequestResult(user_id, query.get("input_id")) tokens = [] response = None @@ -189,6 +192,13 @@ def streaming_request_http(self, query: dict, user_id: int, test_end_time: float message["choices"][0]['delta']['content']="" error = message.get("error") if error is None: + # If stream_options.include_usage == True then the final + # message contains only token stats + if not message.get("choices") and message.get('usage'): + result.output_tokens = message["usage"]["completion_tokens"] + result.input_tokens = message["usage"]["prompt_tokens"] + # We don't want to record this message + continue if "/v1/chat/completions" in self.host: token = message["choices"][0]['delta']['content'] else: @@ -230,10 +240,6 @@ def streaming_request_http(self, query: dict, user_id: int, test_end_time: float # Last token comes with finish_reason set. if message.get("choices", [])[0].get("finish_reason", None): - if message.get("usage"): - result.output_tokens = message["usage"]["completion_tokens"] - result.input_tokens = message["usage"]["prompt_tokens"] - result.stop_reason = message["choices"][0]["finish_reason"] # If test duration timeout didn't happen before the last token is received, diff --git a/result.py b/result.py index b320bcfb..eaef37ca 100644 --- a/result.py +++ b/result.py @@ -4,7 +4,7 @@ class RequestResult: """Request result class.""" - def __init__(self, user_id, input_id, input_tokens): + def __init__(self, user_id, input_id, input_tokens=None): """Init method.""" self.user_id = user_id self.input_id = input_id