diff --git a/src/marqo/tensor_search/filtering.py b/src/marqo/tensor_search/filtering.py index f2a0476b5..98bd00c13 100644 --- a/src/marqo/tensor_search/filtering.py +++ b/src/marqo/tensor_search/filtering.py @@ -98,6 +98,9 @@ def contextualise_user_filter(filter_string: Optional[str], simple_properties: t """ if filter_string is None: return '' + if simple_properties is None: + return filter_string + contextualised_filter = filter_string for field in simple_properties: diff --git a/src/marqo/tensor_search/models/index_info.py b/src/marqo/tensor_search/models/index_info.py index 2528837d8..6b2a5f7f5 100644 --- a/src/marqo/tensor_search/models/index_info.py +++ b/src/marqo/tensor_search/models/index_info.py @@ -83,28 +83,6 @@ def get_model_properties(self) -> dict: index_defaults=index_defaults, model_name=self.model_name ) - def get_possible_tensor_fields(self) -> set: - """returns all fields that have are, or have the potential to be, kNN fields - - These are: - 1. text fields - 2. object fields - - TODO: consider keeping track of fields that have just been marked tensor fields - - Perhaps this would optimise the filtering step - """ - possible_tensor_fields = set() - for text_field, text_props in self.properties.items(): - if not text_field.startswith( - enums.TensorField.vector_prefix) and not text_field in enums.TensorField.__dict__.values(): - if text_props["type"] == enums.OpenSearchDataType.text: - possible_tensor_fields.add(text_field) - elif "properties" in text_props: - # TODO: this couples all opensearch object fields to tensor fields - # (when this may not be the case in the future) - possible_tensor_fields.add(text_field) - return possible_tensor_fields - def get_true_text_properties(self) -> dict: """returns a dict containing only names and properties of fields that diff --git a/tests/tensor_search/test_filtering.py b/tests/tensor_search/test_filtering.py index 4a4f66f8c..cde10c9e8 100644 --- a/tests/tensor_search/test_filtering.py +++ b/tests/tensor_search/test_filtering.py @@ -24,9 +24,9 @@ def test_contextualise_user_filter(self): f"{enums.TensorField.chunks}.spaced\\ int:[0 TO 30]" ), ( # fields with special chars - "field\\&&\\||withspecialchars:(random value)", + "field\\&&\\||withspecialchars:(random \\+value)", ["field&&||withspecialchars"], - f"{enums.TensorField.chunks}.field\\&&\\||withspecialchars:(random value)" + f"{enums.TensorField.chunks}.field\\&&\\||withspecialchars:(random \\+value)" ), ( # field in string not in properties "field_not_in_properties:random AND normal_field:3", @@ -133,7 +133,11 @@ def test_build_searchable_attributes_filter(self): (["field:with:colons"], f"{enums.TensorField.chunks}.{enums.TensorField.field_name}:(field\\:with\\:colons)"), # searchable attribute with parenthesis in it + (["field(with)parenthesis"], + f"{enums.TensorField.chunks}.{enums.TensorField.field_name}:(field\\(with\\)parenthesis)"), # searchable attribute with special characters in it + (["field\\with&&special+characters"], + f"{enums.TensorField.chunks}.{enums.TensorField.field_name}:(field\\\\with\\&&special\\+characters)"), ([], ""), (None, "") ] @@ -146,20 +150,80 @@ def test_build_tensor_search_filter(self): test_cases = ( { "filter_string": "abc:(some text)", - "simple_properties": {"abc": "xyz"}, + "simple_properties": {"abc": {'type': 'text'}}, "searchable_attributes": ["abc"], "expected": f"({enums.TensorField.chunks}.{enums.TensorField.field_name}:(abc)) AND ({enums.TensorField.chunks}.abc:(some text))" }, # parenthesis in searchable attribute + { + "filter_string": "abc:(some text)", + "simple_properties": {"abc": {'type': 'text'}}, + "searchable_attributes": ["abc(with)parenthesis"], + "expected": f"({enums.TensorField.chunks}.{enums.TensorField.field_name}:(abc\\(with\\)parenthesis)) AND ({enums.TensorField.chunks}.abc:(some text))" + }, # empty searchable attributes + { + "filter_string": "abc:(some text)", + "simple_properties": {"abc": {'type': 'text'}}, + "searchable_attributes": [], + "expected": f"{enums.TensorField.chunks}.abc:(some text)" + }, # None searchable attributes + { + "filter_string": "abc:(some text)", + "simple_properties": {"abc": {'type': 'text'}}, + "searchable_attributes": None, + "expected": f"{enums.TensorField.chunks}.abc:(some text)" + }, # parenthesis in filter string (escaped) + { + "filter_string": "abc\\(:(some te\\)xt)", + "simple_properties": {"abc(": {'type': 'text'}}, + "searchable_attributes": ["def"], + "expected": f"({enums.TensorField.chunks}.{enums.TensorField.field_name}:(def)) AND ({enums.TensorField.chunks}.abc\\(:(some te\\)xt))" + }, # empty filter string + { + "filter_string": "", + "simple_properties": {"abc": {'type': 'text'}}, + "searchable_attributes": ["def"], + "expected": f"{enums.TensorField.chunks}.{enums.TensorField.field_name}:(def)" + }, # None filter string + { + "filter_string": None, + "simple_properties": {"abc": {'type': 'text'}}, + "searchable_attributes": ["def"], + "expected": f"{enums.TensorField.chunks}.{enums.TensorField.field_name}:(def)" + }, # : in searchable attribute and filter string + { + "filter_string": "colon\\:here:(some text)", + "simple_properties": {"colon:here": {'type': 'text'}}, + "searchable_attributes": ["colon:here:also"], + "expected": f"({enums.TensorField.chunks}.{enums.TensorField.field_name}:(colon\\:here\\:also)) AND ({enums.TensorField.chunks}.colon\\:here:(some text))" + }, # empty simple properties + { + "filter_string": "abc:(some text)", # chunks prefix will NOT be added + "simple_properties": {}, + "searchable_attributes": ["def"], + "expected": f"({enums.TensorField.chunks}.{enums.TensorField.field_name}:(def)) AND (abc:(some text))" + }, # None simple properties + { + "filter_string": "abc:(some text)", # chunks prefix will NOT be added + "simple_properties": None, + "searchable_attributes": ["def"], + "expected": f"({enums.TensorField.chunks}.{enums.TensorField.field_name}:(def)) AND (abc:(some text))" + }, # empty all + { + "filter_string": "", + "simple_properties": {}, + "searchable_attributes": [], + "expected": "" + } ) for case in test_cases: tensor_search_filter = filtering.build_tensor_search_filter(