diff --git a/outlines/fsm/regex.py b/outlines/fsm/regex.py index a86ddc6fa..ff8dd9131 100644 --- a/outlines/fsm/regex.py +++ b/outlines/fsm/regex.py @@ -785,8 +785,8 @@ def create_fsm_index_end_to_end( re_llama_byte_token = re.compile(r"^<0x[0-9A-F]{2}>$") -# The "▁*" prefix is required to handle Gemma and GPT-SW3 tokenizers, and the "\.*" -# suffix is required to handle the NorwAI tokenizer. +# The "▁*" prefix is required to handle Gemma and GPT-SW3 tokenizers, and the "\.*" +# suffix is required to handle the NorwAI tokenizer. re_replacement_seq = re.compile(r"^▁*�+\.*$") diff --git a/tests/fsm/test_regex.py b/tests/fsm/test_regex.py index 7cc1de0b3..dffa37975 100644 --- a/tests/fsm/test_regex.py +++ b/tests/fsm/test_regex.py @@ -2,7 +2,7 @@ import numba import numpy as np import pytest -from transformers import AutoTokenizer, PreTrainedTokenizerBase +from transformers import AutoTokenizer from outlines.fsm.regex import ( _walk_fsm,