Remove generator.processors

dottxt-ai · Feb 12, 2024 · befd327 · befd327
1 parent e4373e1
commit befd327
Show file tree

Hide file tree

Showing 4 changed files with 76 additions and 84 deletions.
diff --git a/outlines/generate/cfg.py b/outlines/generate/cfg.py
@@ -2,9 +2,8 @@
 
 from outlines.fsm.fsm import CFGFSM
 from outlines.generate.api import SequenceGenerator
-from outlines.generate.processors import CFGLogitsProcessor
 from outlines.models import OpenAI
-from outlines.models.llamacpp import LlamaCpp
+from outlines.models.llamacpp import CFGLogitsProcessor, LlamaCpp
 from outlines.samplers import Sampler, multinomial
 
 

diff --git a/outlines/generate/processors.py b/outlines/generate/processors.py
diff --git a/outlines/generate/regex.py b/outlines/generate/regex.py
@@ -2,9 +2,8 @@
 
 from outlines.fsm.fsm import RegexFSM
 from outlines.generate.api import SequenceGenerator
-from outlines.generate.processors import RegexLogitsProcessor
 from outlines.models import OpenAI
-from outlines.models.llamacpp import LlamaCpp
+from outlines.models.llamacpp import LlamaCpp, RegexLogitsProcessor
 from outlines.samplers import Sampler, multinomial
 
 

diff --git a/outlines/models/llamacpp.py b/outlines/models/llamacpp.py
@@ -1,13 +1,15 @@
 import copy
-from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Union
+import math
+from typing import Dict, Iterator, List, Optional, Union
 
+import numpy as np
 import torch
 
 # TODO: in order to make sub classing work we need to move the installation check here
 from llama_cpp import Llama, LogitsProcessorList, StoppingCriteriaList
+from numpy.typing import NDArray
 
-if TYPE_CHECKING:
-    from outlines.generate.processors import LogitsProcessor
+from outlines.fsm.fsm import CFGFSM, FSM, FSMState, RegexFSM
 
 
 class LlamaCpp(Llama):
@@ -162,3 +164,72 @@ def llamacpp(
     model_kwargs: dict = {},
 ):
     return LlamaCpp(model_name, **model_kwargs)
+
+
+class LogitsProcessor:
+    def __init__(self, tokenizer: LlamaCppTokenizer, fsm: FSM):
+        """A FSM-based logits processor.
+
+        Parameters
+        ----------
+        tokenizer
+            An instance of `Tokenizer`
+        fsm
+            An instance of `FSM`
+
+        """
+        self.tokenizer = tokenizer
+        self.fsm_state = FSMState(0)
+        self.fsm: FSM = fsm
+        self.is_first_token = True
+
+    def __call__(
+        self, input_ids: NDArray[np.int64], scores: NDArray[np.float32]
+    ) -> NDArray[np.float32]:
+        """Use the FSM to bias the logits before sampling the next token."""
+
+        if self.is_first_token:
+            self.is_first_token = False
+        else:
+            last_token = input_ids[-1]
+            self.fsm_state = self.fsm.next_state(self.fsm_state, last_token)
+
+        allowed_tokens = self.fsm.allowed_token_ids(self.fsm_state)
+
+        mask = torch.full((scores.shape[-1],), -math.inf, device="cpu").numpy()
+        mask[allowed_tokens] = 0
+        biased_scores = scores + mask
+
+        return biased_scores
+
+
+class RegexLogitsProcessor(LogitsProcessor):
+    def __init__(self, regex_string: str, tokenizer: LlamaCppTokenizer):
+        """Compile the FSM that drives the regex-guided generation.
+
+        Parameters
+        ----------
+        regex_string
+            A string that represents a regular expression
+        tokenizer
+            An instance of `Tokenizer`
+
+        """
+        fsm = RegexFSM(regex_string, tokenizer)
+        super().__init__(tokenizer, fsm)
+
+
+class CFGLogitsProcessor(LogitsProcessor):
+    def __init__(self, cfg_str: str, tokenizer: LlamaCppTokenizer):
+        """Compile the FSM that drives the CFG-guided generation.
+
+        Parameters
+        ----------
+        cfg_str
+            A string that represents a grammar
+        tokenizer
+            An instance of `Tokenizer`
+
+        """
+        fsm = CFGFSM(cfg_str, tokenizer)
+        super().__init__(tokenizer, fsm)