Skip to content

Commit

Permalink
more edge cases for filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
vicilliar committed Jul 13, 2023
1 parent 119b3a1 commit 333e0c0
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 25 deletions.
3 changes: 3 additions & 0 deletions src/marqo/tensor_search/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ def contextualise_user_filter(filter_string: Optional[str], simple_properties: t
"""
if filter_string is None:
return ''
if simple_properties is None:
return filter_string

contextualised_filter = filter_string

for field in simple_properties:
Expand Down
22 changes: 0 additions & 22 deletions src/marqo/tensor_search/models/index_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,28 +83,6 @@ def get_model_properties(self) -> dict:
index_defaults=index_defaults, model_name=self.model_name
)

def get_possible_tensor_fields(self) -> set:
"""returns all fields that have are, or have the potential to be, kNN fields
These are:
1. text fields
2. object fields
TODO: consider keeping track of fields that have just been marked tensor fields
- Perhaps this would optimise the filtering step
"""
possible_tensor_fields = set()
for text_field, text_props in self.properties.items():
if not text_field.startswith(
enums.TensorField.vector_prefix) and not text_field in enums.TensorField.__dict__.values():
if text_props["type"] == enums.OpenSearchDataType.text:
possible_tensor_fields.add(text_field)
elif "properties" in text_props:
# TODO: this couples all opensearch object fields to tensor fields
# (when this may not be the case in the future)
possible_tensor_fields.add(text_field)
return possible_tensor_fields


def get_true_text_properties(self) -> dict:
"""returns a dict containing only names and properties of fields that
Expand Down
70 changes: 67 additions & 3 deletions tests/tensor_search/test_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ def test_contextualise_user_filter(self):
f"{enums.TensorField.chunks}.spaced\\ int:[0 TO 30]"
),
( # fields with special chars
"field\\&&\\||withspecialchars:(random value)",
"field\\&&\\||withspecialchars:(random \\+value)",
["field&&||withspecialchars"],
f"{enums.TensorField.chunks}.field\\&&\\||withspecialchars:(random value)"
f"{enums.TensorField.chunks}.field\\&&\\||withspecialchars:(random \\+value)"
),
( # field in string not in properties
"field_not_in_properties:random AND normal_field:3",
Expand Down Expand Up @@ -133,7 +133,11 @@ def test_build_searchable_attributes_filter(self):
(["field:with:colons"],
f"{enums.TensorField.chunks}.{enums.TensorField.field_name}:(field\\:with\\:colons)"),
# searchable attribute with parenthesis in it
(["field(with)parenthesis"],
f"{enums.TensorField.chunks}.{enums.TensorField.field_name}:(field\\(with\\)parenthesis)"),
# searchable attribute with special characters in it
(["field\\with&&special+characters"],
f"{enums.TensorField.chunks}.{enums.TensorField.field_name}:(field\\\\with\\&&special\\+characters)"),
([], ""),
(None, "")
]
Expand All @@ -146,20 +150,80 @@ def test_build_tensor_search_filter(self):
test_cases = (
{
"filter_string": "abc:(some text)",
"simple_properties": {"abc": "xyz"},
"simple_properties": {"abc": {'type': 'text'}},
"searchable_attributes": ["abc"],
"expected": f"({enums.TensorField.chunks}.{enums.TensorField.field_name}:(abc)) AND ({enums.TensorField.chunks}.abc:(some text))"
},
# parenthesis in searchable attribute
{
"filter_string": "abc:(some text)",
"simple_properties": {"abc": {'type': 'text'}},
"searchable_attributes": ["abc(with)parenthesis"],
"expected": f"({enums.TensorField.chunks}.{enums.TensorField.field_name}:(abc\\(with\\)parenthesis)) AND ({enums.TensorField.chunks}.abc:(some text))"
},
# empty searchable attributes
{
"filter_string": "abc:(some text)",
"simple_properties": {"abc": {'type': 'text'}},
"searchable_attributes": [],
"expected": f"{enums.TensorField.chunks}.abc:(some text)"
},
# None searchable attributes
{
"filter_string": "abc:(some text)",
"simple_properties": {"abc": {'type': 'text'}},
"searchable_attributes": None,
"expected": f"{enums.TensorField.chunks}.abc:(some text)"
},
# parenthesis in filter string (escaped)
{
"filter_string": "abc\\(:(some te\\)xt)",
"simple_properties": {"abc(": {'type': 'text'}},
"searchable_attributes": ["def"],
"expected": f"({enums.TensorField.chunks}.{enums.TensorField.field_name}:(def)) AND ({enums.TensorField.chunks}.abc\\(:(some te\\)xt))"
},
# empty filter string
{
"filter_string": "",
"simple_properties": {"abc": {'type': 'text'}},
"searchable_attributes": ["def"],
"expected": f"{enums.TensorField.chunks}.{enums.TensorField.field_name}:(def)"
},
# None filter string
{
"filter_string": None,
"simple_properties": {"abc": {'type': 'text'}},
"searchable_attributes": ["def"],
"expected": f"{enums.TensorField.chunks}.{enums.TensorField.field_name}:(def)"
},
# : in searchable attribute and filter string
{
"filter_string": "colon\\:here:(some text)",
"simple_properties": {"colon:here": {'type': 'text'}},
"searchable_attributes": ["colon:here:also"],
"expected": f"({enums.TensorField.chunks}.{enums.TensorField.field_name}:(colon\\:here\\:also)) AND ({enums.TensorField.chunks}.colon\\:here:(some text))"
},
# empty simple properties
{
"filter_string": "abc:(some text)", # chunks prefix will NOT be added
"simple_properties": {},
"searchable_attributes": ["def"],
"expected": f"({enums.TensorField.chunks}.{enums.TensorField.field_name}:(def)) AND (abc:(some text))"
},
# None simple properties
{
"filter_string": "abc:(some text)", # chunks prefix will NOT be added
"simple_properties": None,
"searchable_attributes": ["def"],
"expected": f"({enums.TensorField.chunks}.{enums.TensorField.field_name}:(def)) AND (abc:(some text))"
},
# empty all
{
"filter_string": "",
"simple_properties": {},
"searchable_attributes": [],
"expected": ""
}
)
for case in test_cases:
tensor_search_filter = filtering.build_tensor_search_filter(
Expand Down

0 comments on commit 333e0c0

Please sign in to comment.