From e2ec5d876e4a10a20e719de64750204518abeb37 Mon Sep 17 00:00:00 2001
From: Andrew Lapp <andrew@github.rew.la>
Date: Sun, 15 Sep 2024 15:06:01 -0400
Subject: [PATCH] remove unnecessary tokenizer requirement in models.openai,
 remove dead code

---
 outlines/generate/json.py   |  17 +--
 outlines/models/openai.py   | 200 +-----------------------------------
 pyproject.toml              |   1 -
 tests/models/test_openai.py | 109 ++++++++++----------
 4 files changed, 65 insertions(+), 262 deletions(-)

diff --git a/outlines/generate/json.py b/outlines/generate/json.py
index 64bee256f..3eb4951e4 100644
--- a/outlines/generate/json.py
+++ b/outlines/generate/json.py
@@ -80,10 +80,13 @@ def json_openai(
 
     if isinstance(schema_object, type(BaseModel)):
         schema = pyjson.dumps(schema_object.model_json_schema())
+        format_sequence = lambda x: schema_object.parse_raw(x)
     elif callable(schema_object):
         schema = pyjson.dumps(get_schema_from_signature(schema_object))
+        format_sequence = lambda x: pyjson.loads(x)
     elif isinstance(schema_object, str):
         schema = schema_object
+        format_sequence = lambda x: pyjson.loads(x)
     else:
         raise ValueError(
             f"Cannot parse schema {schema_object}. The schema must be either "
@@ -103,18 +106,6 @@ def json_openai(
         }
     )
 
-    # set generators sequence post-processor
-    if isinstance(schema_object, type(BaseModel)):
-        generator.format_sequence = lambda x: schema_object.parse_raw(x)
-    elif callable(schema_object):
-        generator.format_sequence = lambda x: pyjson.loads(x)
-    elif isinstance(schema_object, str) or isinstance(schema_object, dict):
-        generator.format_sequence = lambda x: pyjson.loads(x)
-    else:
-        raise ValueError(
-            f"Cannot parse schema {schema_object}. The schema must be either "
-            + "a Pydantic object, a function or a string that contains the JSON "
-            + "Schema specification"
-        )
+    generator.format_sequence = format_sequence
 
     return generator
diff --git a/outlines/models/openai.py b/outlines/models/openai.py
index 9d59c1d84..89c26f217 100644
--- a/outlines/models/openai.py
+++ b/outlines/models/openai.py
@@ -1,10 +1,8 @@
 """Integration with OpenAI's API."""
 import copy
 import functools
-import warnings
 from dataclasses import asdict, dataclass, field, replace
-from itertools import zip_longest
-from typing import Callable, Dict, List, Optional, Set, Tuple, Union
+from typing import Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 
@@ -75,7 +73,6 @@ def __init__(
         self,
         client,
         config,
-        tokenizer=None,
         system_prompt: Optional[str] = None,
     ):
         """Create an `OpenAI` instance.
@@ -90,12 +87,9 @@ def __init__(
         config
             An instance of `OpenAIConfig`. Can be useful to specify some
             parameters that cannot be set by calling this class' methods.
-        tokenizer
-            The tokenizer associated with the model the client connects to.
         """
 
         self.client = client
-        self.tokenizer = tokenizer
         self.config = config
 
         # We count the total number of prompt and generated tokens as returned
@@ -161,97 +155,6 @@ def stream(self, *args, **kwargs):
             "Streaming is currently not supported for the OpenAI API"
         )
 
-    def generate_choice(
-        self,
-        prompt: str,
-        choices: List[str],
-        max_tokens: Optional[int] = None,
-        system_prompt: Optional[str] = None,
-    ) -> str:
-        """Call the OpenAI API to generate one of several choices.
-
-        Parameters
-        ----------
-        prompt
-            A string or list of strings that will be used to prompt the model
-        choices
-            The list of strings between which we ask the model to choose
-        max_tokens
-            The maximum number of tokens to generate
-        system_prompt
-            The content of the system message that precedes the user's prompt.
-
-        """
-        if self.tokenizer is None:
-            raise ValueError(
-                "You must initialize the `OpenAI` class with a tokenizer to use `outlines.generate.choice`"
-            )
-
-        config = replace(self.config, max_tokens=max_tokens)
-
-        greedy = False
-        decoded: List[str] = []
-        encoded_choices_left: List[List[int]] = [
-            self.tokenizer.encode(word) for word in choices
-        ]
-
-        while len(encoded_choices_left) > 0:
-            max_tokens_left = max([len(tokens) for tokens in encoded_choices_left])
-            transposed_choices_left: List[Set] = [
-                {item for item in subset if item is not None}
-                for subset in zip_longest(*encoded_choices_left)
-            ]
-
-            if not greedy:
-                mask = build_optimistic_mask(transposed_choices_left)
-            else:
-                mask = {}
-                for token in transposed_choices_left[0]:  # build greedy mask
-                    mask[token] = 100
-
-            if len(mask) == 0:
-                break
-
-            config = replace(config, logit_bias=mask, max_tokens=max_tokens_left)
-
-            response, prompt_tokens, completion_tokens = generate_chat(
-                prompt, system_prompt, self.client, config
-            )
-            self.prompt_tokens += prompt_tokens
-            self.completion_tokens += completion_tokens
-
-            encoded_response = self.tokenizer.encode(response)
-
-            if encoded_response in encoded_choices_left:
-                decoded.append(response)
-                break
-            else:
-                (
-                    encoded_response,
-                    encoded_choices_left,
-                ) = find_response_choices_intersection(
-                    encoded_response, encoded_choices_left
-                )
-
-                if len(encoded_response) == 0:
-                    greedy = True  # next iteration will be "greedy"
-                    continue
-                else:
-                    decoded.append("".join(self.tokenizer.decode(encoded_response)))
-
-                    if len(encoded_choices_left) == 1:  # only one choice left
-                        choice_left = self.tokenizer.decode(encoded_choices_left[0])
-                        decoded.append(choice_left)
-                        break
-
-                    greedy = False  # after each success, stay with (or switch to) "optimistic" approach
-
-                prompt = prompt + "".join(decoded)
-
-        choice = "".join(decoded)
-
-        return choice
-
     def new_with_replacements(self, **kwargs):
         new_instance = copy.copy(self)
         new_instance.config = replace(new_instance.config, **kwargs)
@@ -316,81 +219,6 @@ async def call_api(prompt, system_prompt, config):
     return results, usage["prompt_tokens"], usage["completion_tokens"]
 
 
-def find_longest_intersection(response: List[int], choice: List[int]) -> List[int]:
-    """Find the longest intersection between the response and the choice."""
-    for i, (token_r, token_c) in enumerate(zip_longest(response, choice)):
-        if token_r != token_c:
-            return response[:i]
-
-    return response
-
-
-def find_response_choices_intersection(
-    response: List[int], choices: List[List[int]]
-) -> Tuple[List[int], List[List[int]]]:
-    """Find the longest intersection between the response and the different
-    choices.
-
-    Say the response is of the form `[1, 2, 3, 4, 5]` and we have the choices
-    `[[1, 2], [1, 2, 3], [6, 7, 8]` then the function will return `[1, 2, 3]` as the
-    intersection, and `[[]]` as the list of choices left.
-
-    Parameters
-    ----------
-    response
-        The model's response
-    choices
-        The remaining possible choices
-
-    Returns
-    -------
-    A tuple that contains the longest intersection between the response and the
-    different choices, and the choices which start with this intersection, with the
-    intersection removed.
-
-    """
-    max_len_prefix = 0
-    choices_left = []
-    longest_prefix = []
-    for i, choice in enumerate(choices):
-        # Find the longest intersection between the response and the choice.
-        prefix = find_longest_intersection(response, choice)
-
-        if len(prefix) > max_len_prefix:
-            max_len_prefix = len(prefix)
-            choices_left = [choice[len(prefix) :]]
-            longest_prefix = prefix
-
-        elif len(prefix) == max_len_prefix:
-            choices_left.append(choice[len(prefix) :])
-
-    return longest_prefix, choices_left
-
-
-def build_optimistic_mask(
-    transposed: List[Set[int]], max_mask_size: int = 300
-) -> Dict[int, int]:
-    """We build the largest mask possible.
-
-    Tokens are added from left to right, so if the encoded choices are e.g.
-    `[[1,2], [3,4]]`, `1` and `3` will be added before `2` and `4`.
-
-    Parameters
-    ----------
-    transposed
-        A list of lists that contain the nth token of each choice.
-
-    """
-    mask: Dict[int, int] = {}
-    for tokens in transposed:
-        for token in tokens:
-            if len(mask) == max_mask_size:
-                return mask
-            mask[token] = 100
-
-    return mask
-
-
 def error_handler(api_call_fn: Callable) -> Callable:
     """Handle OpenAI API errors and missing API key."""
 
@@ -430,11 +258,10 @@ def openai_model(
     **openai_client_params,
 ):
     try:
-        import tiktoken
         from openai import AsyncOpenAI
     except ImportError:
         raise ImportError(
-            "The `openai` and `tiktoken` libraries needs to be installed in order to use Outlines' OpenAI integration."
+            "The `openai` library needs to be installed in order to use Outlines' OpenAI integration."
         )
 
     if config is not None:
@@ -444,15 +271,7 @@ def openai_model(
 
     client = AsyncOpenAI(**openai_client_params)
 
-    try:
-        tokenizer = tiktoken.encoding_for_model(model_name)
-    except KeyError:
-        warnings.warn(
-            f"Could not find a tokenizer for model {model_name}. Using default cl100k_base."
-        )
-        tokenizer = tiktoken.get_encoding("cl100k_base")
-
-    return OpenAI(client, config, tokenizer)
+    return OpenAI(client, config)
 
 
 def azure_openai(
@@ -462,11 +281,10 @@ def azure_openai(
     **azure_openai_client_params,
 ):
     try:
-        import tiktoken
         from openai import AsyncAzureOpenAI
     except ImportError:
         raise ImportError(
-            "The `openai` and `tiktoken` libraries needs to be installed in order to use Outlines' Azure OpenAI integration."
+            "The `openai` library needs to be installed in order to use Outlines' Azure OpenAI integration."
         )
 
     if config is not None:
@@ -476,12 +294,4 @@ def azure_openai(
 
     client = AsyncAzureOpenAI(**azure_openai_client_params)
 
-    try:
-        tokenizer = tiktoken.encoding_for_model(model_name or deployment_name)
-    except KeyError:
-        warnings.warn(
-            f"Could not find a tokenizer for model {model_name or deployment_name}. Using default cl100k_base."
-        )
-        tokenizer = tiktoken.get_encoding("cl100k_base")
-
-    return OpenAI(client, config, tokenizer)
+    return OpenAI(client, config)
diff --git a/pyproject.toml b/pyproject.toml
index dfda81acd..8e9e7e581 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -122,7 +122,6 @@ module = [
     "pydantic.*",
     "pytest",
     "referencing.*",
-    "tiktoken.*",
     "torch.*",
     "transformers.*",
     "llama_cpp",
diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py
index 6c85963ef..f4b97f36a 100644
--- a/tests/models/test_openai.py
+++ b/tests/models/test_openai.py
@@ -1,4 +1,6 @@
 import importlib
+import json
+from contextlib import contextmanager
 from unittest import mock
 from unittest.mock import MagicMock
 
@@ -6,13 +8,7 @@
 from openai import AsyncOpenAI
 
 from outlines import generate
-from outlines.models.openai import (
-    OpenAI,
-    OpenAIConfig,
-    build_optimistic_mask,
-    find_longest_intersection,
-    find_response_choices_intersection,
-)
+from outlines.models.openai import OpenAI, OpenAIConfig
 
 
 def module_patch(path):
@@ -63,60 +59,67 @@ def test_openai_call():
         assert mocked_generate_chat_arg_config.n == 3
 
 
-def test_openai_choice_call():
+@contextmanager
+def patched_openai(completion, **oai_config):
+    """Create a patched openai whose chat completions always returns `completion`"""
     with module_patch("outlines.models.openai.generate_chat") as mocked_generate_chat:
-        mocked_generate_chat.return_value = '{"result": "foo"}', 1, 2
+        mocked_generate_chat.return_value = completion, 1, 2
         async_client = MagicMock(spec=AsyncOpenAI, api_key="key")
-
         model = OpenAI(
             async_client,
             OpenAIConfig(max_tokens=10, temperature=0.5, n=2, stop=["."]),
         )
+        yield model
 
-        # doesn't actually test endpoint or behavior,
-        # just integration between generate.choice and models.openai
+
+def test_openai_choice_call():
+    with patched_openai(completion='{"result": "foo"}') as model:
         generator = generate.choice(model, ["foo", "bar"])
         assert generator("hi") == "foo"
 
 
-@pytest.mark.parametrize(
-    "response,choice,expected_intersection,expected_choices_left",
-    (
-        ([1, 2, 3, 4], [[5, 6]], [], [[5, 6]]),
-        ([1, 2, 3, 4], [[5, 6], [7, 8]], [], [[5, 6], [7, 8]]),
-        ([1, 2, 3, 4], [[1, 2], [7, 8]], [1, 2], [[]]),
-        ([1, 2], [[1, 2, 3, 4], [1, 2]], [1, 2], [[3, 4], []]),
-        ([1, 2, 3], [[1, 2, 3, 4], [1, 2]], [1, 2, 3], [[4]]),
-    ),
-)
-def test_find_response_choices_intersection(
-    response, choice, expected_intersection, expected_choices_left
-):
-    intersection, choices_left = find_response_choices_intersection(response, choice)
-    assert intersection == expected_intersection
-    assert choices_left == expected_choices_left
-
-
-@pytest.mark.parametrize(
-    "response,choice,expected_prefix",
-    (
-        ([1, 2, 3], [1, 2, 3, 4], [1, 2, 3]),
-        ([1, 2, 3], [1, 2, 3], [1, 2, 3]),
-        ([4, 5], [1, 2, 3], []),
-    ),
-)
-def test_find_longest_common_prefix(response, choice, expected_prefix):
-    prefix = find_longest_intersection(response, choice)
-    assert prefix == expected_prefix
-
-
-@pytest.mark.parametrize(
-    "transposed,mask_size,expected_mask",
-    (
-        ([{1, 2}, {3, 4}], 3, {1: 100, 2: 100, 3: 100}),
-        ([{1, 2}, {3, 4}], 4, {1: 100, 2: 100, 3: 100, 4: 100}),
-    ),
-)
-def test_build_optimistic_mask(transposed, mask_size, expected_mask):
-    mask = build_optimistic_mask(transposed, mask_size)
-    assert mask == expected_mask
+def test_openai_choice_call_invalid_server_response():
+    with patched_openai(completion="not actual json") as model:
+        generator = generate.choice(model, ["foo", "bar"])
+        with pytest.raises(json.decoder.JSONDecodeError):
+            generator("hi")
+
+
+def test_openai_json_call_pydantic():
+    from pydantic import BaseModel, ConfigDict, ValidationError
+
+    class Person(BaseModel):
+        model_config = ConfigDict(extra="forbid")  # required for openai
+        first_name: str
+        last_name: str
+        age: int
+
+    completion = '{"first_name": "Usain", "last_name": "Bolt", "age": 38}'
+
+    # assert success for valid response
+    with patched_openai(completion=completion) as model:
+        generator = generate.json(model, Person)
+        assert generator("fastest person") == Person.parse_raw(completion)
+
+    # assert fail for non-json response
+    with patched_openai(completion="usain bolt") as model:
+        generator = generate.json(model, Person)
+        with pytest.raises(ValidationError):
+            assert generator("fastest person")
+
+
+def test_openai_json_call_str():
+    person_schema = '{"additionalProperties": false, "properties": {"first_name": {"title": "First Name", "type": "string"}, "last_name": {"title": "Last Name", "type": "string"}, "age": {"title": "Age", "type": "integer"}}, "required": ["first_name", "last_name", "age"], "title": "Person", "type": "object"}'
+
+    output = {"first_name": "Usain", "last_name": "Bolt", "age": 38}
+
+    # assert success for valid response
+    with patched_openai(completion=json.dumps(output)) as model:
+        generator = generate.json(model, person_schema)
+        assert generator("fastest person") == output
+
+    # assert fail for non-json response
+    with patched_openai(completion="usain bolt") as model:
+        generator = generate.json(model, person_schema)
+        with pytest.raises(json.decoder.JSONDecodeError):
+            assert generator("fastest person")