Skip to content

Commit

Permalink
Add another missing walk_fsm condition
Browse files Browse the repository at this point in the history
The full-match option handling was not correct for scanned/walked strings with
valid transitions but not ending in a final state.
  • Loading branch information
brandonwillard committed Oct 20, 2023
1 parent 3cf3f96 commit 95a9d97
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 8 deletions.
9 changes: 8 additions & 1 deletion outlines/text/fsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,9 @@ def _walk_fsm(

accepted_states.append(_nonoptional(state))

if full_match and last_final_idx - 1 != i:
return numba.typed.List.empty_list(numba.int64)

return accepted_states


Expand Down Expand Up @@ -305,6 +308,9 @@ def walk_fsm(

accepted_states.append(state)

if full_match and last_final_idx - 1 != i:
return []

return accepted_states


Expand Down Expand Up @@ -376,7 +382,7 @@ def process_token_string(
res = set()
vocab_string_len = len(token)

for end_idx, state_seq in find_partial_matches(fsm_info, token):
for end_idx, state_seq in find_partial_matches(fsm_info, token, full_match=False):
if end_idx is not None and end_idx < vocab_string_len - 1:
continue

Expand Down Expand Up @@ -603,6 +609,7 @@ def state_scan_tokens(
fsm_finals,
token,
start_state,
False,
)

if state_seq is not None and len(state_seq) < len(token):
Expand Down
26 changes: 19 additions & 7 deletions tests/text/test_fsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,18 @@ def test_walk_fsm(function):
res = tuple(function(regex_fsm, "0", 1, full_match=True))
assert res == tuple()

regex_pattern = interegular.parse_pattern("0|[1-9][2-9]+")
regex_fsm, _ = make_deterministic_fsm(regex_pattern.to_fsm().reduce())

res = tuple(function(regex_fsm, "1", regex_fsm.initial, full_match=True))
assert res == tuple()

res = tuple(function(regex_fsm, "1", regex_fsm.initial, full_match=False))
assert res == (2,)

res = tuple(function(regex_fsm, "12", regex_fsm.initial, full_match=True))
assert res == (2, 3)

pattern = interegular.parse_pattern(r"(?:[^\W\d]\w*|[\t \x0c]+)")
fsm, _ = make_deterministic_fsm(pattern.to_fsm().reduce())

Expand Down Expand Up @@ -90,19 +102,19 @@ def to_python(res):

res = to_python(find_partial_matches(def_fsm, "def"))
assert res == {(2, (0, 1, 2, 3))}
res = to_python(find_partial_matches(def_fsm, "de"))
res = to_python(find_partial_matches(def_fsm, "de", full_match=False))
assert res == {(1, (0, 1, 2))}
res = to_python(find_partial_matches(def_fsm, "d"))
res = to_python(find_partial_matches(def_fsm, "d", full_match=False))
assert res == {(0, (0, 1))}
res = to_python(find_partial_matches(def_fsm, ""))
assert res == set()
res = to_python(find_partial_matches(def_fsm, "df"))
assert res == set()
res = to_python(find_partial_matches(def_fsm, "ef"))
res = to_python(find_partial_matches(def_fsm, "ef", full_match=False))
assert res == {(1, (1, 2, 3))}
res = to_python(find_partial_matches(def_fsm, "e"))
res = to_python(find_partial_matches(def_fsm, "e", full_match=False))
assert res == {(0, (1, 2))}
res = to_python(find_partial_matches(def_fsm, "f"))
res = to_python(find_partial_matches(def_fsm, "f", full_match=False))
assert res == {(0, (2, 3))}
res = to_python(find_partial_matches(def_fsm, "ef foo", full_match=False))
assert res == {(1, (1, 2, 3))}
Expand All @@ -112,7 +124,7 @@ def to_python(res):
assert res == {(2, (0, 1, 2, 3))}

# `NAME` can have multiple start states for this input
res = to_python(find_partial_matches(name_fsm, "d"))
res = to_python(find_partial_matches(name_fsm, "d", full_match=False))
assert res == {(0, (0, 1)), (0, (1, 1))}
# Not this case
res = to_python(find_partial_matches(name_fsm, "1d"))
Expand All @@ -133,7 +145,7 @@ def to_python(res):

float_fsm = float_fsm.fsm_info

res = to_python(find_partial_matches(float_fsm, "."))
res = to_python(find_partial_matches(float_fsm, ".", full_match=False))
assert res == {(0, (3, 5)), (0, (4, 5)), (0, (0, 2))}

joins_fsm, _ = make_deterministic_fsm(
Expand Down

0 comments on commit 95a9d97

Please sign in to comment.