From e2ec5d876e4a10a20e719de64750204518abeb37 Mon Sep 17 00:00:00 2001 From: Andrew Lapp Date: Sun, 15 Sep 2024 15:06:01 -0400 Subject: [PATCH] remove unnecessary tokenizer requirement in models.openai, remove dead code --- outlines/generate/json.py | 17 +-- outlines/models/openai.py | 200 +----------------------------------- pyproject.toml | 1 - tests/models/test_openai.py | 109 ++++++++++---------- 4 files changed, 65 insertions(+), 262 deletions(-) diff --git a/outlines/generate/json.py b/outlines/generate/json.py index 64bee256f..3eb4951e4 100644 --- a/outlines/generate/json.py +++ b/outlines/generate/json.py @@ -80,10 +80,13 @@ def json_openai( if isinstance(schema_object, type(BaseModel)): schema = pyjson.dumps(schema_object.model_json_schema()) + format_sequence = lambda x: schema_object.parse_raw(x) elif callable(schema_object): schema = pyjson.dumps(get_schema_from_signature(schema_object)) + format_sequence = lambda x: pyjson.loads(x) elif isinstance(schema_object, str): schema = schema_object + format_sequence = lambda x: pyjson.loads(x) else: raise ValueError( f"Cannot parse schema {schema_object}. The schema must be either " @@ -103,18 +106,6 @@ def json_openai( } ) - # set generators sequence post-processor - if isinstance(schema_object, type(BaseModel)): - generator.format_sequence = lambda x: schema_object.parse_raw(x) - elif callable(schema_object): - generator.format_sequence = lambda x: pyjson.loads(x) - elif isinstance(schema_object, str) or isinstance(schema_object, dict): - generator.format_sequence = lambda x: pyjson.loads(x) - else: - raise ValueError( - f"Cannot parse schema {schema_object}. The schema must be either " - + "a Pydantic object, a function or a string that contains the JSON " - + "Schema specification" - ) + generator.format_sequence = format_sequence return generator diff --git a/outlines/models/openai.py b/outlines/models/openai.py index 9d59c1d84..89c26f217 100644 --- a/outlines/models/openai.py +++ b/outlines/models/openai.py @@ -1,10 +1,8 @@ """Integration with OpenAI's API.""" import copy import functools -import warnings from dataclasses import asdict, dataclass, field, replace -from itertools import zip_longest -from typing import Callable, Dict, List, Optional, Set, Tuple, Union +from typing import Callable, Dict, List, Optional, Tuple, Union import numpy as np @@ -75,7 +73,6 @@ def __init__( self, client, config, - tokenizer=None, system_prompt: Optional[str] = None, ): """Create an `OpenAI` instance. @@ -90,12 +87,9 @@ def __init__( config An instance of `OpenAIConfig`. Can be useful to specify some parameters that cannot be set by calling this class' methods. - tokenizer - The tokenizer associated with the model the client connects to. """ self.client = client - self.tokenizer = tokenizer self.config = config # We count the total number of prompt and generated tokens as returned @@ -161,97 +155,6 @@ def stream(self, *args, **kwargs): "Streaming is currently not supported for the OpenAI API" ) - def generate_choice( - self, - prompt: str, - choices: List[str], - max_tokens: Optional[int] = None, - system_prompt: Optional[str] = None, - ) -> str: - """Call the OpenAI API to generate one of several choices. - - Parameters - ---------- - prompt - A string or list of strings that will be used to prompt the model - choices - The list of strings between which we ask the model to choose - max_tokens - The maximum number of tokens to generate - system_prompt - The content of the system message that precedes the user's prompt. - - """ - if self.tokenizer is None: - raise ValueError( - "You must initialize the `OpenAI` class with a tokenizer to use `outlines.generate.choice`" - ) - - config = replace(self.config, max_tokens=max_tokens) - - greedy = False - decoded: List[str] = [] - encoded_choices_left: List[List[int]] = [ - self.tokenizer.encode(word) for word in choices - ] - - while len(encoded_choices_left) > 0: - max_tokens_left = max([len(tokens) for tokens in encoded_choices_left]) - transposed_choices_left: List[Set] = [ - {item for item in subset if item is not None} - for subset in zip_longest(*encoded_choices_left) - ] - - if not greedy: - mask = build_optimistic_mask(transposed_choices_left) - else: - mask = {} - for token in transposed_choices_left[0]: # build greedy mask - mask[token] = 100 - - if len(mask) == 0: - break - - config = replace(config, logit_bias=mask, max_tokens=max_tokens_left) - - response, prompt_tokens, completion_tokens = generate_chat( - prompt, system_prompt, self.client, config - ) - self.prompt_tokens += prompt_tokens - self.completion_tokens += completion_tokens - - encoded_response = self.tokenizer.encode(response) - - if encoded_response in encoded_choices_left: - decoded.append(response) - break - else: - ( - encoded_response, - encoded_choices_left, - ) = find_response_choices_intersection( - encoded_response, encoded_choices_left - ) - - if len(encoded_response) == 0: - greedy = True # next iteration will be "greedy" - continue - else: - decoded.append("".join(self.tokenizer.decode(encoded_response))) - - if len(encoded_choices_left) == 1: # only one choice left - choice_left = self.tokenizer.decode(encoded_choices_left[0]) - decoded.append(choice_left) - break - - greedy = False # after each success, stay with (or switch to) "optimistic" approach - - prompt = prompt + "".join(decoded) - - choice = "".join(decoded) - - return choice - def new_with_replacements(self, **kwargs): new_instance = copy.copy(self) new_instance.config = replace(new_instance.config, **kwargs) @@ -316,81 +219,6 @@ async def call_api(prompt, system_prompt, config): return results, usage["prompt_tokens"], usage["completion_tokens"] -def find_longest_intersection(response: List[int], choice: List[int]) -> List[int]: - """Find the longest intersection between the response and the choice.""" - for i, (token_r, token_c) in enumerate(zip_longest(response, choice)): - if token_r != token_c: - return response[:i] - - return response - - -def find_response_choices_intersection( - response: List[int], choices: List[List[int]] -) -> Tuple[List[int], List[List[int]]]: - """Find the longest intersection between the response and the different - choices. - - Say the response is of the form `[1, 2, 3, 4, 5]` and we have the choices - `[[1, 2], [1, 2, 3], [6, 7, 8]` then the function will return `[1, 2, 3]` as the - intersection, and `[[]]` as the list of choices left. - - Parameters - ---------- - response - The model's response - choices - The remaining possible choices - - Returns - ------- - A tuple that contains the longest intersection between the response and the - different choices, and the choices which start with this intersection, with the - intersection removed. - - """ - max_len_prefix = 0 - choices_left = [] - longest_prefix = [] - for i, choice in enumerate(choices): - # Find the longest intersection between the response and the choice. - prefix = find_longest_intersection(response, choice) - - if len(prefix) > max_len_prefix: - max_len_prefix = len(prefix) - choices_left = [choice[len(prefix) :]] - longest_prefix = prefix - - elif len(prefix) == max_len_prefix: - choices_left.append(choice[len(prefix) :]) - - return longest_prefix, choices_left - - -def build_optimistic_mask( - transposed: List[Set[int]], max_mask_size: int = 300 -) -> Dict[int, int]: - """We build the largest mask possible. - - Tokens are added from left to right, so if the encoded choices are e.g. - `[[1,2], [3,4]]`, `1` and `3` will be added before `2` and `4`. - - Parameters - ---------- - transposed - A list of lists that contain the nth token of each choice. - - """ - mask: Dict[int, int] = {} - for tokens in transposed: - for token in tokens: - if len(mask) == max_mask_size: - return mask - mask[token] = 100 - - return mask - - def error_handler(api_call_fn: Callable) -> Callable: """Handle OpenAI API errors and missing API key.""" @@ -430,11 +258,10 @@ def openai_model( **openai_client_params, ): try: - import tiktoken from openai import AsyncOpenAI except ImportError: raise ImportError( - "The `openai` and `tiktoken` libraries needs to be installed in order to use Outlines' OpenAI integration." + "The `openai` library needs to be installed in order to use Outlines' OpenAI integration." ) if config is not None: @@ -444,15 +271,7 @@ def openai_model( client = AsyncOpenAI(**openai_client_params) - try: - tokenizer = tiktoken.encoding_for_model(model_name) - except KeyError: - warnings.warn( - f"Could not find a tokenizer for model {model_name}. Using default cl100k_base." - ) - tokenizer = tiktoken.get_encoding("cl100k_base") - - return OpenAI(client, config, tokenizer) + return OpenAI(client, config) def azure_openai( @@ -462,11 +281,10 @@ def azure_openai( **azure_openai_client_params, ): try: - import tiktoken from openai import AsyncAzureOpenAI except ImportError: raise ImportError( - "The `openai` and `tiktoken` libraries needs to be installed in order to use Outlines' Azure OpenAI integration." + "The `openai` library needs to be installed in order to use Outlines' Azure OpenAI integration." ) if config is not None: @@ -476,12 +294,4 @@ def azure_openai( client = AsyncAzureOpenAI(**azure_openai_client_params) - try: - tokenizer = tiktoken.encoding_for_model(model_name or deployment_name) - except KeyError: - warnings.warn( - f"Could not find a tokenizer for model {model_name or deployment_name}. Using default cl100k_base." - ) - tokenizer = tiktoken.get_encoding("cl100k_base") - - return OpenAI(client, config, tokenizer) + return OpenAI(client, config) diff --git a/pyproject.toml b/pyproject.toml index dfda81acd..8e9e7e581 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -122,7 +122,6 @@ module = [ "pydantic.*", "pytest", "referencing.*", - "tiktoken.*", "torch.*", "transformers.*", "llama_cpp", diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py index 6c85963ef..f4b97f36a 100644 --- a/tests/models/test_openai.py +++ b/tests/models/test_openai.py @@ -1,4 +1,6 @@ import importlib +import json +from contextlib import contextmanager from unittest import mock from unittest.mock import MagicMock @@ -6,13 +8,7 @@ from openai import AsyncOpenAI from outlines import generate -from outlines.models.openai import ( - OpenAI, - OpenAIConfig, - build_optimistic_mask, - find_longest_intersection, - find_response_choices_intersection, -) +from outlines.models.openai import OpenAI, OpenAIConfig def module_patch(path): @@ -63,60 +59,67 @@ def test_openai_call(): assert mocked_generate_chat_arg_config.n == 3 -def test_openai_choice_call(): +@contextmanager +def patched_openai(completion, **oai_config): + """Create a patched openai whose chat completions always returns `completion`""" with module_patch("outlines.models.openai.generate_chat") as mocked_generate_chat: - mocked_generate_chat.return_value = '{"result": "foo"}', 1, 2 + mocked_generate_chat.return_value = completion, 1, 2 async_client = MagicMock(spec=AsyncOpenAI, api_key="key") - model = OpenAI( async_client, OpenAIConfig(max_tokens=10, temperature=0.5, n=2, stop=["."]), ) + yield model - # doesn't actually test endpoint or behavior, - # just integration between generate.choice and models.openai + +def test_openai_choice_call(): + with patched_openai(completion='{"result": "foo"}') as model: generator = generate.choice(model, ["foo", "bar"]) assert generator("hi") == "foo" -@pytest.mark.parametrize( - "response,choice,expected_intersection,expected_choices_left", - ( - ([1, 2, 3, 4], [[5, 6]], [], [[5, 6]]), - ([1, 2, 3, 4], [[5, 6], [7, 8]], [], [[5, 6], [7, 8]]), - ([1, 2, 3, 4], [[1, 2], [7, 8]], [1, 2], [[]]), - ([1, 2], [[1, 2, 3, 4], [1, 2]], [1, 2], [[3, 4], []]), - ([1, 2, 3], [[1, 2, 3, 4], [1, 2]], [1, 2, 3], [[4]]), - ), -) -def test_find_response_choices_intersection( - response, choice, expected_intersection, expected_choices_left -): - intersection, choices_left = find_response_choices_intersection(response, choice) - assert intersection == expected_intersection - assert choices_left == expected_choices_left - - -@pytest.mark.parametrize( - "response,choice,expected_prefix", - ( - ([1, 2, 3], [1, 2, 3, 4], [1, 2, 3]), - ([1, 2, 3], [1, 2, 3], [1, 2, 3]), - ([4, 5], [1, 2, 3], []), - ), -) -def test_find_longest_common_prefix(response, choice, expected_prefix): - prefix = find_longest_intersection(response, choice) - assert prefix == expected_prefix - - -@pytest.mark.parametrize( - "transposed,mask_size,expected_mask", - ( - ([{1, 2}, {3, 4}], 3, {1: 100, 2: 100, 3: 100}), - ([{1, 2}, {3, 4}], 4, {1: 100, 2: 100, 3: 100, 4: 100}), - ), -) -def test_build_optimistic_mask(transposed, mask_size, expected_mask): - mask = build_optimistic_mask(transposed, mask_size) - assert mask == expected_mask +def test_openai_choice_call_invalid_server_response(): + with patched_openai(completion="not actual json") as model: + generator = generate.choice(model, ["foo", "bar"]) + with pytest.raises(json.decoder.JSONDecodeError): + generator("hi") + + +def test_openai_json_call_pydantic(): + from pydantic import BaseModel, ConfigDict, ValidationError + + class Person(BaseModel): + model_config = ConfigDict(extra="forbid") # required for openai + first_name: str + last_name: str + age: int + + completion = '{"first_name": "Usain", "last_name": "Bolt", "age": 38}' + + # assert success for valid response + with patched_openai(completion=completion) as model: + generator = generate.json(model, Person) + assert generator("fastest person") == Person.parse_raw(completion) + + # assert fail for non-json response + with patched_openai(completion="usain bolt") as model: + generator = generate.json(model, Person) + with pytest.raises(ValidationError): + assert generator("fastest person") + + +def test_openai_json_call_str(): + person_schema = '{"additionalProperties": false, "properties": {"first_name": {"title": "First Name", "type": "string"}, "last_name": {"title": "Last Name", "type": "string"}, "age": {"title": "Age", "type": "integer"}}, "required": ["first_name", "last_name", "age"], "title": "Person", "type": "object"}' + + output = {"first_name": "Usain", "last_name": "Bolt", "age": 38} + + # assert success for valid response + with patched_openai(completion=json.dumps(output)) as model: + generator = generate.json(model, person_schema) + assert generator("fastest person") == output + + # assert fail for non-json response + with patched_openai(completion="usain bolt") as model: + generator = generate.json(model, person_schema) + with pytest.raises(json.decoder.JSONDecodeError): + assert generator("fastest person")