Skip to content

Commit

Permalink
[plugins/openai_plugins] Properly record input_tokens and output_toke…
Browse files Browse the repository at this point in the history
…ns when streaming (#55)

* fix: unset input tokens

For a streaming request we set input tokens to default value at start of
function. At the end of the function if input tokens is not set we emit
an error and set it to dafault. Fix removes default from top of
function.

* feat: collect usage on streaming requests

* style: fix spacing on parameter
  • Loading branch information
sjmonson authored Aug 30, 2024
1 parent 4b8a0e3 commit 3a5d653
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 6 deletions.
16 changes: 11 additions & 5 deletions plugins/openai_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ def streaming_request_http(self, query: dict, user_id: int, test_end_time: float
"max_tokens": query["output_tokens"],
"temperature": 0.1,
"stream": True,
"stream_options": {
"include_usage": True
}
}
if "/v1/chat/completions" in self.host:
data["messages"] = [
Expand All @@ -150,7 +153,7 @@ def streaming_request_http(self, query: dict, user_id: int, test_end_time: float
if self.model_name is not None:
data["model"] = self.model_name

result = RequestResult(user_id, query.get("input_id"), query.get("input_tokens"))
result = RequestResult(user_id, query.get("input_id"))

tokens = []
response = None
Expand Down Expand Up @@ -189,6 +192,13 @@ def streaming_request_http(self, query: dict, user_id: int, test_end_time: float
message["choices"][0]['delta']['content']=""
error = message.get("error")
if error is None:
# If stream_options.include_usage == True then the final
# message contains only token stats
if not message.get("choices") and message.get('usage'):
result.output_tokens = message["usage"]["completion_tokens"]
result.input_tokens = message["usage"]["prompt_tokens"]
# We don't want to record this message
continue
if "/v1/chat/completions" in self.host:
token = message["choices"][0]['delta']['content']
else:
Expand Down Expand Up @@ -230,10 +240,6 @@ def streaming_request_http(self, query: dict, user_id: int, test_end_time: float

# Last token comes with finish_reason set.
if message.get("choices", [])[0].get("finish_reason", None):
if message.get("usage"):
result.output_tokens = message["usage"]["completion_tokens"]
result.input_tokens = message["usage"]["prompt_tokens"]

result.stop_reason = message["choices"][0]["finish_reason"]

# If test duration timeout didn't happen before the last token is received,
Expand Down
2 changes: 1 addition & 1 deletion result.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
class RequestResult:
"""Request result class."""

def __init__(self, user_id, input_id, input_tokens):
def __init__(self, user_id, input_id, input_tokens=None):
"""Init method."""
self.user_id = user_id
self.input_id = input_id
Expand Down

0 comments on commit 3a5d653

Please sign in to comment.