diff --git a/src/marqo/cloud_helpers.py b/src/marqo/cloud_helpers.py index 59df3110..3f45108f 100644 --- a/src/marqo/cloud_helpers.py +++ b/src/marqo/cloud_helpers.py @@ -3,11 +3,13 @@ from marqo.marqo_logging import mq_logger -def cloud_wait_for_index_status(req, index_name, status): - creation = req.get(f"indexes/{index_name}/status") - while creation['index_status'] != status: +def cloud_wait_for_index_status(req , index_name: str, status): + """ Wait for index to be created on Marqo Cloud by checking + it's status every 10 seconds until it becomes expected value""" + current_status = req.get(f"indexes/{index_name}/status") + while current_status['index_status'] != status: time.sleep(10) - creation = req.get(f"indexes/{index_name}/status") - mq_logger.info(f"Index creation status: {creation['index_status']}") + current_status = req.get(f"indexes/{index_name}/status") + mq_logger.info(f"Index creation status: {current_status['index_status']}") mq_logger.info("Index created successfully") return True diff --git a/src/marqo/index.py b/src/marqo/index.py index 80eaf4cb..98f877c3 100644 --- a/src/marqo/index.py +++ b/src/marqo/index.py @@ -1,6 +1,5 @@ import functools import json -import logging import pprint import time @@ -55,14 +54,16 @@ def __init__( if config.is_marqo_cloud: try: if self.get_status()["index_status"] != IndexStatus.CREATED: - logging.warning(f"Index {index_name} is not ready. Status: {self.get_status()}, operations may fail.") + mq_logger.warning(f"Index {index_name} is not ready. Status: {self.get_status()}. Common operations, " + f"such as search and add_documents, may fail until the index is ready. " + f"Please check `mq.index('{index_name}').get_status()` for the index's status. " + f"Skipping version check.") skip_version_check = True except Exception as e: skip_version_check = True + mq_logger.warning(f"Failed to get index status for index {index_name}. Skipping version check. Error: {e}") if not skip_version_check: self._marqo_minimum_supported_version_check() - else: - logging.warning("Version check is skipped because index is not ready yet.") def delete(self) -> Dict[str, Any]: """Delete the index. @@ -435,7 +436,6 @@ def _add_docs_organiser( f"docs (server unbatched), for an average of {(res['processingTimeMs'] / (1000 * num_docs)):.3f}s per doc.") if 'errors' in res and res['errors']: mq_logger.info(error_detected_message) - if errors_detected: mq_logger.info(error_detected_message) total_add_docs_time = timer() - t0 diff --git a/tests/marqo_test.py b/tests/marqo_test.py index f4010948..d0948ee8 100644 --- a/tests/marqo_test.py +++ b/tests/marqo_test.py @@ -105,6 +105,16 @@ def wrapper(self, *args, **kwargs): return decorator +def create_settings_hash(settings_dict, kwargs): + combined_dict = {**settings_dict, **kwargs} + combined_str = ''.join(f"{key}{value}" for key, value in combined_dict.items()) + crc32_hash = zlib.crc32(combined_str.encode()) + short_hash = hex(crc32_hash & 0xffffffff)[2:][ + :10] # Take the first 10 characters of the hexadecimal representation + print(f"Created index with settings hash: {short_hash} for settings: {combined_dict}") + return short_hash + + class MarqoTestCase(TestCase): @classmethod @@ -115,10 +125,7 @@ def setUpClass(cls) -> None: api_key = os.environ.get("MARQO_API_KEY", None) if (api_key): local_marqo_settings["api_key"] = api_key - cls.index_suffix = os.environ.get("MARQO_INDEX_SUFFIX", None) - if not cls.index_suffix: - os.environ["MARQO_INDEX_SUFFIX"] = str(uuid.uuid4())[:8] - cls.index_suffix = os.environ["MARQO_INDEX_SUFFIX"] + cls.index_suffix = os.environ.get("MARQO_INDEX_SUFFIX", "") cls.client_settings = local_marqo_settings cls.authorized_url = cls.client_settings["url"] cls.generic_test_index_name = 'test-index' @@ -133,32 +140,35 @@ def tearDownClass(cls) -> None: """ client = marqo.Client(**cls.client_settings) for index in client.get_indexes()['results']: - if not client.config.is_marqo_cloud: - try: - index.delete() - except marqo.errors.MarqoApiError as e: - logging.debug(f'received error `{e}` from index deletion request.') + if index.index_name.startswith(cls.generic_test_index_name): + if not client.config.is_marqo_cloud: + try: + index.delete() + except marqo.errors.MarqoApiError as e: + logging.debug(f'received error `{e}` from index deletion request.') def setUp(self) -> None: self.client = Client(**self.client_settings) - for index in self.client.get_indexes()['results']: - if not self.client.config.is_marqo_cloud: - try: - index.delete() - except marqo.errors.MarqoApiError as e: - logging.debug(f'received error `{e}` from index deletion request.') - else: - self.cleanup_documents_from_all_indices() + if self.client.config.is_marqo_cloud: + self.cleanup_documents_from_all_indices() + else: + for index in self.client.get_indexes()['results']: + if index.index_name.startswith(self.generic_test_index_name): + try: + index.delete() + except marqo.errors.MarqoApiError as e: + logging.debug(f'received error `{e}` from index deletion request.') def tearDown(self) -> None: - for index in self.client.get_indexes()['results']: - if not self.client.config.is_marqo_cloud: - try: - index.delete() - except marqo.errors.MarqoApiError as e: - logging.debug(f'received error `{e}` from index deletion request.') - else: - self.cleanup_documents_from_all_indices() + if self.client.config.is_marqo_cloud: + self.cleanup_documents_from_all_indices() + else: + for index in self.client.get_indexes()['results']: + if index.index_name.startswith(self.generic_test_index_name): + try: + index.delete() + except marqo.errors.MarqoApiError as e: + logging.debug(f'received error `{e}` from index deletion request.') def warm_request(self, func, *args, **kwargs): ''' @@ -170,33 +180,28 @@ def warm_request(self, func, *args, **kwargs): func(*args, **kwargs) def create_cloud_index(self, index_name, settings_dict=None, **kwargs): - def create_settings_hash(): - combined_dict = {**settings_dict, **kwargs} - combined_str = ''.join(f"{key}{value}" for key, value in combined_dict.items()) - crc32_hash = zlib.crc32(combined_str.encode()) - short_hash = hex(crc32_hash & 0xffffffff)[2:][ - :10] # Take the first 10 characters of the hexadecimal representation - print(f"Created index with settings hash: {short_hash} for settings: {combined_dict}") - return short_hash - client = marqo.Client(**self.client_settings) settings_dict = settings_dict if settings_dict else {} index_name = f"{index_name}-{self.index_suffix}" if settings_dict or kwargs: - index_name = f"{index_name}-{create_settings_hash()}" + index_name = f"{index_name}-{create_settings_hash(settings_dict, kwargs)}" settings_dict.update({ "inference_type": "marqo.CPU", "storage_class": "marqo.basic", "model": "hf/all_datasets_v4_MiniLM-L6" }) - while True: - try: - if client.http.get(f"/indexes/{index_name}/status")["index_status"] == "READY": - break - except Exception as e: - pass + try: + status = client.http.get(f"/indexes/{index_name}/status")["index_status"] + if status == "CREATING": + while status == "CREATING": + time.sleep(10) + status = client.http.get(f"/indexes/{index_name}/status")["index_status"] + if status != "READY": + self.client.create_index(index_name, settings_dict=settings_dict, **kwargs) + except Exception as e: self.client.create_index(index_name, settings_dict=settings_dict, **kwargs) return index_name - def create_test_index(self, index_name, settings_dict=None, **kwargs): + def create_test_index(self, index_name: str, settings_dict: dict = None, **kwargs): + """Create a test index with the given name and settings and triggers specific logic if index is cloud index""" client = marqo.Client(**self.client_settings) if client.config.is_marqo_cloud: index_name = self.create_cloud_index(index_name, settings_dict, **kwargs) @@ -208,9 +213,10 @@ def cleanup_documents_from_all_indices(self): client = marqo.Client(**self.client_settings) indexes = client.get_indexes() for index in indexes['results']: - if self.index_suffix in index.index_name.split('-'): + if index.index_name.startswith(self.generic_test_index_name) and \ + self.index_suffix in index.index_name.split('-'): if client.http.get(f"/indexes/{index.index_name}/status")["index_status"] == "READY": - docs_to_delete = [i['_id'] for i in index.search("")['hits']] + docs_to_delete = [i['_id'] for i in index.search("", limit=100)['hits']] while docs_to_delete: index.delete_documents(docs_to_delete, auto_refresh=True) - docs_to_delete = [i['_id'] for i in index.search("")['hits']] + docs_to_delete = [i['_id'] for i in index.search("", limit=100)['hits']] \ No newline at end of file diff --git a/tests/scripts/__init__.py b/tests/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/scripts/create_test_suffix.py b/tests/scripts/create_test_suffix.py new file mode 100644 index 00000000..064dd3ef --- /dev/null +++ b/tests/scripts/create_test_suffix.py @@ -0,0 +1,8 @@ +import os +import uuid + + +def set_index_suffix(): + index_suffix = os.environ.get("MARQO_INDEX_SUFFIX", None) + if not index_suffix: + os.environ["MARQO_INDEX_SUFFIX"] = str(uuid.uuid4())[:8] \ No newline at end of file diff --git a/tests/scripts/delete_all_indexes.py b/tests/scripts/delete_all_indexes.py index 6c1a1c18..8441010b 100644 --- a/tests/scripts/delete_all_indexes.py +++ b/tests/scripts/delete_all_indexes.py @@ -3,22 +3,22 @@ import marqo -def cleanup_documents_from_all_indices(): +def delete_all_test_indices(): local_marqo_settings = { "url": os.environ.get("MARQO_URL", 'http://localhost:8882'), } + suffix = os.environ.get("MARQO_INDEX_SUFFIX", None) api_key = os.environ.get("MARQO_API_KEY", None) if api_key: local_marqo_settings["api_key"] = api_key client = marqo.Client(**local_marqo_settings) indexes = client.get_indexes() for index in indexes['results']: - if client.config.is_marqo_cloud: - if index.get_status()["index_status"] == marqo.enums.IndexStatus.CREATED: - index.delete() - else: - index.delete() + if index.index_name.startswith('test-index'): + if suffix is not None and suffix in index.index_name.split('-'): + if index.get_status()["index_status"] == marqo.enums.IndexStatus.CREATED: + index.delete() if __name__ == '__main__': - cleanup_documents_from_all_indices() + delete_all_test_indices() diff --git a/tests/scripts/run_cloud_tests.py b/tests/scripts/run_cloud_tests.py new file mode 100644 index 00000000..552c8be2 --- /dev/null +++ b/tests/scripts/run_cloud_tests.py @@ -0,0 +1,18 @@ +import sys +from create_test_suffix import set_index_suffix +from delete_all_indexes import delete_all_test_indices + +if __name__ == '__main__': + # Generate the random suffix + set_index_suffix() + + # Run the first command to generate the suffix (already done) + # generate_index_suffix.py will set the TEST_INDEX_SUFFIX environment variable + + # Run the second command with the generated suffix and pass posargs to pytest + import pytest + pytest_args = ['tests/', '-m', 'not ignore_cloud_tests'] + sys.argv[1:] + pytest.main(pytest_args) + + # Run the third command that uses the suffix + delete_all_test_indices() diff --git a/tests/v0_tests/test_add_documents.py b/tests/v0_tests/test_add_documents.py index 7059ad58..738a4fa7 100644 --- a/tests/v0_tests/test_add_documents.py +++ b/tests/v0_tests/test_add_documents.py @@ -16,7 +16,7 @@ class TestAddDocuments(MarqoTestCase): # Create index tests def test_create_index(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) def test_create_index_double(self): if not self.client.config.is_marqo_cloud: @@ -24,12 +24,14 @@ def test_create_index_double(self): try: self.create_test_index(index_name=self.generic_test_index_name) except MarqoError as e: - assert e.code in ["index_already_exists", "index_already_exists_cloud"] + assert e.code == "index_already_exists_cloud" + except MarqoWebError as e: + assert e.code == "index_already_exists" def test_create_index_hnsw(self): if not self.client.config.is_marqo_cloud: self.client.delete_index(self.generic_test_index_name) - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name, settings_dict={ + test_index_name = self.create_test_index(index_name=self.generic_test_index_name, settings_dict={ "index_defaults": { "ann_parameters": { "parameters": { @@ -38,40 +40,42 @@ def test_create_index_hnsw(self): } } }) - assert self.client.get_index(self.test_index_name).get_settings() \ + assert self.client.get_index(test_index_name).get_settings() \ ["index_defaults"]["ann_parameters"]["parameters"]["m"] == 24 # Ensure non-specified values are in default - assert self.client.get_index(self.test_index_name).get_settings() \ + assert self.client.get_index(test_index_name).get_settings() \ ["index_defaults"]["ann_parameters"]["parameters"]["ef_construction"] == 128 - assert self.client.get_index(self.test_index_name).get_settings() \ + assert self.client.get_index(test_index_name).get_settings() \ ["index_defaults"]["ann_parameters"]["space_type"] == "cosinesimil" # Delete index tests: - # def test_delete_index(self): - # self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) - # self.client.delete_index(self.test_index_name) - # self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + def test_delete_index(self): + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + self.client.delete_index(test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) # Get index tests: def test_get_index(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) - index = self.client.get_index(self.test_index_name) - assert index.index_name == self.test_index_name + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + index = self.client.get_index(test_index_name) + assert index.index_name == test_index_name def test_get_index_non_existent(self): try: index = self.client.get_index("some-non-existent-index") raise AssertionError except MarqoError as e: - assert e.code in ["index_not_found", "index_not_found_cloud"] + assert e.code == "index_not_found_cloud" + except MarqoWebError as e: + assert e.code == "index_not_found" # Add documents tests: def test_add_documents_with_ids(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) d1 = { "doc title": "Cool Document 1", "field 1": "some extra info", @@ -82,18 +86,18 @@ def test_add_documents_with_ids(self): "field X": "this is a solid doc", "_id": "123456" } - res = self.client.index(self.test_index_name).add_documents([ + res = self.client.index(test_index_name).add_documents([ d1, d2 ], tensor_fields=["field X", "field 1", "doc title"]) - retrieved_d1 = self.client.index(self.test_index_name).get_document( + retrieved_d1 = self.client.index(test_index_name).get_document( document_id="e197e580-0393-4f4e-90e9-8cdf4b17e339") assert retrieved_d1 == d1 - retrieved_d2 = self.client.index(self.test_index_name).get_document(document_id="123456") + retrieved_d2 = self.client.index(test_index_name).get_document(document_id="123456") assert retrieved_d2 == d2 def test_add_documents(self): """indexes the documents and retrieves the documents with the generated IDs""" - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) d1 = { "doc title": "Cool Document 1", "field 1": "some extra info" @@ -102,37 +106,37 @@ def test_add_documents(self): "doc title": "Just Your Average Doc", "field X": "this is a solid doc" } - res = self.client.index(self.test_index_name).add_documents([d1, d2], tensor_fields=["field X", "field 1", "doc title"]) + res = self.client.index(test_index_name).add_documents([d1, d2], tensor_fields=["field X", "field 1", "doc title"]) ids = [item["_id"] for item in res["items"]] assert len(ids) == 2 assert ids[0] != ids[1] - retrieved_d0 = self.client.index(self.test_index_name).get_document(ids[0]) - retrieved_d1 = self.client.index(self.test_index_name).get_document(ids[1]) + retrieved_d0 = self.client.index(test_index_name).get_document(ids[0]) + retrieved_d1 = self.client.index(test_index_name).get_document(ids[1]) del retrieved_d0["_id"] del retrieved_d1["_id"] assert retrieved_d0 == d1 or retrieved_d0 == d2 assert retrieved_d1 == d1 or retrieved_d1 == d2 def test_add_documents_with_ids_twice(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) d1 = { "doc title": "Just Your Average Doc", "field X": "this is a solid doc", "_id": "56" } - self.client.index(self.test_index_name).add_documents([d1], tensor_fields=["field X", "doc title"]) - assert d1 == self.client.index(self.test_index_name).get_document("56") + self.client.index(test_index_name).add_documents([d1], tensor_fields=["field X", "doc title"]) + assert d1 == self.client.index(test_index_name).get_document("56") d2 = { "_id": "56", "completely": "different doc.", "field X": "this is a solid doc" } - self.client.index(self.test_index_name).add_documents([d2], tensor_fields=["field X", "completely"]) - assert d2 == self.client.index(self.test_index_name).get_document("56") + self.client.index(test_index_name).add_documents([d2], tensor_fields=["field X", "completely"]) + assert d2 == self.client.index(test_index_name).get_document("56") def test_add_batched_documents(self): - self.test_index_name = self.create_test_index(self.generic_test_index_name) - ix = self.client.index(index_name=self.test_index_name) + test_index_name = self.create_test_index(self.generic_test_index_name) + ix = self.client.index(index_name=test_index_name) doc_ids = [str(num) for num in range(0, 100)] docs = [ @@ -147,7 +151,7 @@ def test_add_batched_documents(self): for _id in [0, 19, 20, 99]: original_doc = docs[_id].copy() assert ix.get_document(document_id=str(_id)) == original_doc - assert self.client.index(index_name=self.test_index_name).get_stats()['numberOfDocuments'] == 100 + assert self.client.index(index_name=test_index_name).get_stats()['numberOfDocuments'] == 100 def test_add_documents_long_fields(self): """TODO @@ -159,49 +163,49 @@ def test_update_docs_updates_chunks(self): # delete documents tests: def test_delete_docs(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) - self.client.index(self.test_index_name).add_documents([ + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + self.client.index(test_index_name).add_documents([ {"abc": "wow camel", "_id": "123"}, {"abc": "camels are cool", "_id": "foo"} ], tensor_fields=["abc"]) if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, "wow camel") + self.warm_request(self.client.index(test_index_name).search, "wow camel") - res0 = self.client.index(self.test_index_name).search("wow camel") + res0 = self.client.index(test_index_name).search("wow camel") print("res0res0") pprint.pprint(res0) assert res0['hits'][0]["_id"] == "123" assert len(res0['hits']) == 2 - self.client.index(self.test_index_name).delete_documents(["123"]) + self.client.index(test_index_name).delete_documents(["123"]) if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, "wow camel") - res1 = self.client.index(self.test_index_name).search("wow camel") + self.warm_request(self.client.index(test_index_name).search, "wow camel") + res1 = self.client.index(test_index_name).search("wow camel") assert res1['hits'][0]["_id"] == "foo" assert len(res1['hits']) == 1 def test_delete_docs_empty_ids(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) - self.client.index(self.test_index_name).add_documents([{"abc": "efg", "_id": "123"}], tensor_fields=["abc"]) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + self.client.index(test_index_name).add_documents([{"abc": "efg", "_id": "123"}], tensor_fields=["abc"]) try: - self.client.index(self.test_index_name).delete_documents([]) + self.client.index(test_index_name).delete_documents([]) raise AssertionError except MarqoWebError as e: assert "can't be empty" in str(e) or "value_error.missing" in str(e) - res = self.client.index(self.test_index_name).get_document("123") + res = self.client.index(test_index_name).get_document("123") print(res) assert "abc" in res def test_get_document(self): my_doc = {"abc": "efg", "_id": "123"} - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) - self.client.index(self.test_index_name).add_documents([my_doc], tensor_fields=["abc"]) - retrieved = self.client.index(self.test_index_name).get_document(document_id='123') + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + self.client.index(test_index_name).add_documents([my_doc], tensor_fields=["abc"]) + retrieved = self.client.index(test_index_name).get_document(document_id='123') assert retrieved == my_doc def test_add_documents_missing_index_fails(self): - with pytest.raises(MarqoError) as ex: + with pytest.raises((MarqoError, MarqoWebError)) as ex: self.client.index("some-non-existing-index").add_documents([{"abd": "efg"}], tensor_fields=["abc"]) assert ex.value.code in ["index_not_found", "index_not_found_cloud"] @@ -298,28 +302,28 @@ def run(): assert "processes=12" not in kwargs["path"] def test_resilient_indexing(self): - self.test_index_name = self.create_test_index(self.generic_test_index_name) + test_index_name = self.create_test_index(self.generic_test_index_name) if self.IS_MULTI_INSTANCE: time.sleep(1) - assert 0 == self.client.index(self.test_index_name).get_stats()['numberOfDocuments'] + assert 0 == self.client.index(test_index_name).get_stats()['numberOfDocuments'] d1 = {"d1": "blah", "_id": "1234"} d2 = {"d2": "blah", "_id": "5678"} docs = [d1, {"content": "some terrible doc", "d3": "blah", "_id": 12345}, d2] - self.client.index(self.test_index_name).add_documents(documents=docs, tensor_fields=["d1", "d2", "d3", "content"]) + self.client.index(test_index_name).add_documents(documents=docs, tensor_fields=["d1", "d2", "d3", "content"]) if self.IS_MULTI_INSTANCE: time.sleep(1) - assert 2 == self.client.index(self.test_index_name).get_stats()['numberOfDocuments'] - assert d1 == self.client.index(self.test_index_name).get_document(document_id='1234') - assert d2 == self.client.index(self.test_index_name).get_document(document_id='5678') + assert 2 == self.client.index(test_index_name).get_stats()['numberOfDocuments'] + assert d1 == self.client.index(test_index_name).get_document(document_id='1234') + assert d2 == self.client.index(test_index_name).get_document(document_id='5678') if self.IS_MULTI_INSTANCE: time.sleep(1) assert {"1234", "5678"} == {d['_id'] for d in - self.client.index(self.test_index_name).search("blah", limit=3)['hits']} + self.client.index(test_index_name).search("blah", limit=3)['hits']} def test_batching_add_docs(self): @@ -365,32 +369,32 @@ def run(): def test_add_lists_non_tensor(self): original_doc = {"d1": "blah", "_id": "1234", 'my list': ['tag-1', 'tag-2']} - self.test_index_name = self.create_test_index(self.generic_test_index_name) - self.client.index(self.test_index_name).add_documents(documents=[original_doc], non_tensor_fields=['my list']) + test_index_name = self.create_test_index(self.generic_test_index_name) + self.client.index(test_index_name).add_documents(documents=[original_doc], non_tensor_fields=['my list']) if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, q='something', filter_string='my\ list:tag-1' ) - res = self.client.index(self.test_index_name).search( + res = self.client.index(test_index_name).search( q='something', filter_string='my\ list:tag-1' ) assert res['hits'][0]['_id'] == '1234' if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, q='something', filter_string='my\ list:tag-non-existent' ) - bad_res = self.client.index(self.test_index_name).search( + bad_res = self.client.index(test_index_name).search( q='something', filter_string='my\ list:tag-non-existent' ) assert len(bad_res['hits']) == 0 def test_use_existing_fields(self): - self.test_index_name = self.create_test_index(self.generic_test_index_name) - self.client.index(index_name=self.test_index_name).add_documents( + test_index_name = self.create_test_index(self.generic_test_index_name) + self.client.index(index_name=test_index_name).add_documents( documents=[ { "_id": "123", @@ -404,11 +408,11 @@ def test_use_existing_fields(self): assert {"title 1", "_embedding", "old"} == functools.reduce( lambda x, y: x.union(y), [set(facet.keys()) for facet in - self.client.index(index_name=self.test_index_name).get_document( + self.client.index(index_name=test_index_name).get_document( document_id="123", expose_facets=True)["_tensor_facets"]] ) - self.client.index(index_name=self.test_index_name).add_documents( + self.client.index(index_name=test_index_name).add_documents( documents=[ { "_id": "123", @@ -421,7 +425,7 @@ def test_use_existing_fields(self): assert {"title 1", "_embedding", "new f"} == functools.reduce( lambda x, y: x.union(y), [set(facet.keys()) for facet in - self.client.index(index_name=self.test_index_name).get_document( + self.client.index(index_name=test_index_name).get_document( document_id="123", expose_facets=True)["_tensor_facets"]] ) @@ -430,9 +434,9 @@ def test_multimodal_combination_doc(self): "treat_urls_and_pointers_as_images": True, "model": "ViT-B/32", } - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name, **settings) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name, **settings) - self.client.index(index_name=self.test_index_name).add_documents( + self.client.index(index_name=test_index_name).add_documents( documents=[ { "combo_text_image": { @@ -465,50 +469,50 @@ def test_multimodal_combination_doc(self): }}}, auto_refresh=True, tensor_fields=["combo_text_image", "space field"]) if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, "A rider is riding a horse jumping over the barrier_0", search_method="lexical") - lexical_res = self.client.index(self.test_index_name).search( + lexical_res = self.client.index(test_index_name).search( "A rider is riding a horse jumping over the barrier_0", search_method="lexical") assert lexical_res["hits"][0]["_id"] == "111" # a space at the end if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, "", filter_string="combo_text_image.text_0\ : (A rider is riding a horse jumping over the barrier_0.)") - filtering_res = self.client.index(self.test_index_name).search( + filtering_res = self.client.index(test_index_name).search( "", filter_string="combo_text_image.text_0\ : (A rider is riding a horse jumping over the barrier_0.)") assert filtering_res["hits"][0]["_id"] == "111" if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, "") + self.warm_request(self.client.index(test_index_name).search, "") - tensor_res = self.client.index(self.test_index_name).search("") + tensor_res = self.client.index(test_index_name).search("") assert tensor_res["hits"][0]["_id"] == "111" if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, "search this with space", search_method="lexical") - space_lexical_res = self.client.index(self.test_index_name).search( + space_lexical_res = self.client.index(test_index_name).search( "search this with space", search_method="lexical") assert space_lexical_res["hits"][0]["_id"] == "111" # A space in the middle if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, "", filter_string="space\ field.space\ child\ 1:(search this with space)") - space_filtering_res = self.client.index(self.test_index_name).search( + space_filtering_res = self.client.index(test_index_name).search( "", filter_string="space\ field.space\ child\ 1:(search this with space)") assert space_filtering_res["hits"][0]["_id"] == "111" if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, "") + self.warm_request(self.client.index(test_index_name).search, "") - space_tensor_res = self.client.index(self.test_index_name).search("") + space_tensor_res = self.client.index(test_index_name).search("") assert space_tensor_res["hits"][0]["_id"] == "111" def test_add_docs_image_download_headers(self): @@ -533,6 +537,6 @@ def test_add_docs_logs_deprecation_warning_if_non_tensor_fields(self): non_tensor_fields = ['text'] with self.assertLogs('marqo', level='WARNING') as cm: - self.test_index_name = self.create_test_index(self.generic_test_index_name) - self.client.index(self.test_index_name).add_documents(documents=documents, non_tensor_fields=non_tensor_fields) + test_index_name = self.create_test_index(self.generic_test_index_name) + self.client.index(test_index_name).add_documents(documents=documents, non_tensor_fields=non_tensor_fields) self.assertTrue({'`non_tensor_fields`', 'Marqo', '2.0.0.'}.issubset(set(cm.output[0].split(" ")))) diff --git a/tests/v0_tests/test_client.py b/tests/v0_tests/test_client.py index 10b84b7d..593ad52d 100644 --- a/tests/v0_tests/test_client.py +++ b/tests/v0_tests/test_client.py @@ -52,20 +52,20 @@ def test_error_handling_in_health_check(self): self.assertIn("If you are trying to check the health on Marqo Cloud", cm.exception.message) def test_check_index_health_response(self): - self.test_index_name = self.create_test_index(self.generic_test_index_name) - res = self.client.index(self.test_index_name).health() + test_index_name = self.create_test_index(self.generic_test_index_name) + res = self.client.index(test_index_name).health() assert 'status' in res assert 'status' in res['backend'] def test_check_index_health_query(self): + test_index_name = self.create_test_index(self.generic_test_index_name) with patch("marqo._httprequests.HttpRequests.get") as mock_get: - self.test_index_name = self.create_test_index(self.generic_test_index_name) - res = self.client.index(self.test_index_name).health() + res = self.client.index(test_index_name).health() args, kwargs = mock_get.call_args - self.assertIn(f"/{self.test_index_name}/health", kwargs["path"]) + self.assertIn(f"health", kwargs["path"]) def test_overwrite_cloud_url_and_client_is_set_to_marqo(self): - current = os.environ.get("MARQO_CLOUD_URL") + current = os.environ.get("MARQO_CLOUD_URL", "api.marqo.ai") os.environ["MARQO_CLOUD_URL"] = "https://cloud.url.com" client = Client(url="https://cloud.url.com", api_key="test") self.assertTrue(client.config.is_marqo_cloud) diff --git a/tests/v0_tests/test_custom_vector_search.py b/tests/v0_tests/test_custom_vector_search.py index 7a4f4e50..0b238c44 100644 --- a/tests/v0_tests/test_custom_vector_search.py +++ b/tests/v0_tests/test_custom_vector_search.py @@ -3,6 +3,7 @@ from marqo.client import Client from marqo.errors import MarqoApiError, MarqoWebError from tests.marqo_test import MarqoTestCase +from pytest import mark class TestCustomVectorSearch(MarqoTestCase): @@ -121,6 +122,8 @@ def test_context_vector_with_flat_query(self): ## Ensure other tests are not affected self.query = {"What are the best pets": 1} + +@mark.ignore_cloud_tests class TestCustomBulkVectorSearch(TestCustomVectorSearch): def search_with_context(self, context_vector: Optional[Dict[str, List[Dict[str, Any]]]] = None) -> Dict[str, Any]: diff --git a/tests/v0_tests/test_demos.py b/tests/v0_tests/test_demos.py index be5a8d19..ea2b62ff 100644 --- a/tests/v0_tests/test_demos.py +++ b/tests/v0_tests/test_demos.py @@ -11,25 +11,26 @@ class TestDemo(MarqoTestCase): """Tests for demos. """ def setUp(self) -> None: - client_0 = Client(**self.client_settings) - for ix_name in ["cool-index-1", "my-first-index", "my-weighted-query-index", "my-first-multimodal-index"]: - try: - client_0.delete_index(ix_name) - except MarqoApiError as s: - pass + self.client = Client(**self.client_settings) + if not self.client.config.is_marqo_cloud: + for ix_name in ["cool-index-1", "my-first-index", "my-weighted-query-index", "my-first-multimodal-index"]: + try: + self.client.delete_index(ix_name) + except MarqoApiError as s: + pass def tearDown(self) -> None: - client_0 = Client(**self.client_settings) - for ix_name in ["cool-index-1", "my-first-index", "my-weighted-query-index", "my-first-multimodal-index"]: - try: - client_0.delete_index(ix_name) - except MarqoApiError as s: - pass + if not self.client.config.is_marqo_cloud: + for ix_name in ["cool-index-1", "my-first-index", "my-weighted-query-index", "my-first-multimodal-index"]: + try: + self.client.delete_index(ix_name) + except MarqoApiError as s: + pass def test_demo(self): client = Client(**self.client_settings) - self.create_test_index("cool-index-1") - client.index("cool-index-1").add_documents([ + test_index_name = self.create_test_index("cool-index-1") + client.index(test_index_name).add_documents([ { "Title": "The Legend of the River", "Description": "Once upon a time there was a cat who wore a hat. " @@ -50,16 +51,18 @@ def test_demo(self): print("\nSearching the phrase 'River' across all fields") if self.IS_MULTI_INSTANCE: - self.warm_request(client.index("cool-index-1").search,"River") + self.warm_request(client.index(test_index_name).search,"River") - pprint.pprint(client.index("cool-index-1").search("River")) + pprint.pprint(client.index(test_index_name).search("River")) # then we search specific searchable attributes. We can see how powerful semantic search is print("\nThen we search specific 'River over' searchable attributes. We can see how powerful semantic search is") if self.IS_MULTI_INSTANCE: - self.warm_request(client.index("cool-index-1").search,"River", searchable_attributes=["Key Points"]) + self.warm_request(client.index(test_index_name).search,"River", searchable_attributes=["Key Points"]) - pprint.pprint(client.index("cool-index-1").search("River", searchable_attributes=["Key Points"])) + pprint.pprint(client.index(test_index_name).search("River", searchable_attributes=["Key Points"])) + + self.client.delete_index(test_index_name) def test_readme_example(self): @@ -67,8 +70,8 @@ def test_readme_example(self): mq = marqo.Client(**self.client_settings) - self.create_test_index("my-first-index") - mq.index("my-first-index").add_documents( + test_index_name = self.create_test_index("my-first-index") + mq.index(test_index_name).add_documents( [ { "Title": "The Travels of Marco Polo", @@ -84,11 +87,11 @@ def test_readme_example(self): ) if self.IS_MULTI_INSTANCE: - self.warm_request(mq.index("my-first-index").search, + self.warm_request(mq.index(test_index_name).search, q="What is the best outfit to wear on the moon?" ) - results = mq.index("my-first-index").search( + results = mq.index(test_index_name).search( q="What is the best outfit to wear on the moon?" ) @@ -96,7 +99,7 @@ def test_readme_example(self): assert results["hits"][0]["_id"] == "article_591" - r2 = mq.index("my-first-index").get_document(document_id="article_591") + r2 = mq.index(test_index_name).get_document(document_id="article_591") assert { "Title": "Extravehicular Mobility Unit (EMU)", "Description": "The EMU is a spacesuit that provides environmental protection, " @@ -104,34 +107,34 @@ def test_readme_example(self): "_id": "article_591" } == r2 - r3 = mq.index("my-first-index").get_stats() + r3 = mq.index(test_index_name).get_stats() assert r3["numberOfDocuments"] == 2 if self.IS_MULTI_INSTANCE: - self.warm_request(mq.index("my-first-index").search,'marco polo', search_method=marqo.SearchMethods.LEXICAL) + self.warm_request(mq.index(test_index_name).search,'marco polo', search_method=marqo.SearchMethods.LEXICAL) - r4 = mq.index("my-first-index").search('marco polo', search_method=marqo.SearchMethods.LEXICAL) + r4 = mq.index(test_index_name).search('marco polo', search_method=marqo.SearchMethods.LEXICAL) assert r4["hits"][0]["Title"] == "The Travels of Marco Polo" if self.IS_MULTI_INSTANCE: - self.warm_request(mq.index("my-first-index").search,'adventure', searchable_attributes=['Title']) + self.warm_request(mq.index(test_index_name).search,'adventure', searchable_attributes=['Title']) - r5 = mq.index("my-first-index").search('adventure', searchable_attributes=['Title']) + r5 = mq.index(test_index_name).search('adventure', searchable_attributes=['Title']) assert len(r5["hits"]) == 2 - r6 = mq.index("my-first-index").delete_documents(ids=["article_591", "article_602"]) + r6 = mq.index(test_index_name).delete_documents(ids=["article_591", "article_602"]) assert r6['details']['deletedDocuments'] == 1 - rneg1 = mq.index("my-first-index").delete() + rneg1 = mq.index(test_index_name).delete() pprint.pprint(rneg1) assert (rneg1["acknowledged"] is True) or (rneg1["acknowledged"] == 'true') def test_readme_example_weighted_query(self): import marqo mq = marqo.Client(**self.client_settings) - self.create_test_index("my-weighted-query-index") - mq.index("my-weighted-query-index").add_documents([ + test_index_name = self.create_test_index("my-weighted-query-index") + mq.index(test_index_name).add_documents([ { "Title": "Smartphone", "Description": "A smartphone is a portable computer device that combines mobile telephone " @@ -152,7 +155,7 @@ def test_readme_example_weighted_query(self): tensor_fields=["Title", "Description"] ) - r1 = mq.index("my-weighted-query-index").get_stats() + r1 = mq.index(test_index_name).get_stats() assert r1["numberOfDocuments"] == 3 query = { @@ -161,11 +164,11 @@ def test_readme_example_weighted_query(self): } if self.IS_MULTI_INSTANCE: - self.warm_request(mq.index("my-weighted-query-index").search, + self.warm_request(mq.index(test_index_name).search, q=query, searchable_attributes=["Title", "Description"] ) - r2 = mq.index("my-weighted-query-index").search( + r2 = mq.index(test_index_name).search( q=query, searchable_attributes=["Title", "Description"] ) @@ -179,11 +182,11 @@ def test_readme_example_weighted_query(self): } if self.IS_MULTI_INSTANCE: - self.warm_request(mq.index("my-weighted-query-index").search, + self.warm_request(mq.index(test_index_name).search, q=query, searchable_attributes=["Title", "Description"] ) - r3 = mq.index("my-weighted-query-index").search( + r3 = mq.index(test_index_name).search( q=query, searchable_attributes=["Title", "Description"] ) print("\nQuery 2:") @@ -198,7 +201,7 @@ def test_readme_example_weighted_query(self): assert len(r2["hits"]) == 3 assert len(r3["hits"]) == 3 - rneg1 = mq.index("my-weighted-query-index").delete() + rneg1 = mq.index(test_index_name).delete() pprint.pprint(rneg1) assert (rneg1["acknowledged"] is True) or (rneg1["acknowledged"] == 'true') @@ -206,8 +209,8 @@ def test_readme_example_multimodal_combination_query(self): import marqo mq = marqo.Client(**self.client_settings) settings = {"treat_urls_and_pointers_as_images": True, "model": "ViT-B/32"} - self.create_test_index("my-first-multimodal-index", **settings) - mq.index("my-first-multimodal-index").add_documents( + test_index_name = self.create_test_index("my-first-multimodal-index", **settings) + mq.index(test_index_name).add_documents( [ { "Title": "Flying Plane", @@ -243,16 +246,16 @@ def test_readme_example_multimodal_combination_query(self): tensor_fields=["captioned_image"], ) - r1 = mq.index("my-first-multimodal-index").get_stats() + r1 = mq.index(test_index_name).get_stats() assert r1["numberOfDocuments"] == 3 if self.IS_MULTI_INSTANCE: - self.warm_request(mq.index("my-first-multimodal-index").search, + self.warm_request(mq.index(test_index_name).search, q="Give me some images of vehicles and modes of transport. I am especially interested in air travel and commercial aeroplanes.", searchable_attributes=["captioned_image"] ) - r2 = mq.index("my-first-multimodal-index").search( + r2 = mq.index(test_index_name).search( q="Give me some images of vehicles and modes of transport. I am especially interested in air travel and commercial aeroplanes.", searchable_attributes=["captioned_image"], ) @@ -262,14 +265,14 @@ def test_readme_example_multimodal_combination_query(self): assert r2["hits"][0]["Title"] == "Flying Plane" if self.IS_MULTI_INSTANCE: - self.warm_request(mq.index("my-first-multimodal-index").search, + self.warm_request(mq.index(test_index_name).search, q={ "What are some vehicles and modes of transport?": 1.0, "Aeroplanes and other things that fly": -1.0, }, searchable_attributes=["captioned_image"] ) - r3 = mq.index("my-first-multimodal-index").search( + r3 = mq.index(test_index_name).search( q={ "What are some vehicles and modes of transport?": 1.0, "Aeroplanes and other things that fly": -1.0, @@ -283,11 +286,11 @@ def test_readme_example_multimodal_combination_query(self): assert r3["hits"][0]["Title"] == "Red Bus" if self.IS_MULTI_INSTANCE: - self.warm_request(mq.index("my-first-multimodal-index").search, + self.warm_request(mq.index(test_index_name).search, q={"Animals of the Perissodactyla order": -1.0}, searchable_attributes=["captioned_image"], ) - r4 = mq.index("my-first-multimodal-index").search( + r4 = mq.index(test_index_name).search( q={"Animals of the Perissodactyla order": -1.0}, searchable_attributes=["captioned_image"], ) @@ -300,6 +303,6 @@ def test_readme_example_multimodal_combination_query(self): assert len(r3["hits"]) == 3 assert len(r4["hits"]) == 3 - rneg1 = mq.index("my-first-multimodal-index").delete() + rneg1 = mq.index(test_index_name).delete() pprint.pprint(rneg1) - assert (rneg1["acknowledged"] is True) or (rneg1["acknowledged"] == 'true') \ No newline at end of file + assert (rneg1["acknowledged"] is True) or (rneg1["acknowledged"] == 'true') diff --git a/tests/v0_tests/test_get_indexes.py b/tests/v0_tests/test_get_indexes.py index 962373b8..886d44fd 100644 --- a/tests/v0_tests/test_get_indexes.py +++ b/tests/v0_tests/test_get_indexes.py @@ -30,8 +30,10 @@ def test_get_indexes(self): ix_2 = self.client.get_indexes() assert self._is_index_name_in_get_indexes_response(self.test_index_name_2, ix_2) - assert len(ix_2['results']) > len(ix_1['results']) - assert len(ix_1['results']) > len(ix_0['results']) + # since indexes are not deleted after each test for cloud instances, this assert may not be correct. + if not self.client.config.is_marqo_cloud: + assert len(ix_2['results']) > len(ix_1['results']) + assert len(ix_1['results']) > len(ix_0['results']) for found_index in ix_2['results']: assert isinstance(found_index, marqo.index.Index) diff --git a/tests/v0_tests/test_get_settings.py b/tests/v0_tests/test_get_settings.py index ff8e653d..7eeb2019 100644 --- a/tests/v0_tests/test_get_settings.py +++ b/tests/v0_tests/test_get_settings.py @@ -1,6 +1,7 @@ from tests.marqo_test import MarqoTestCase from marqo.errors import IndexNotFoundError from marqo.client import Client +from pytest import mark class TestGetSettings(MarqoTestCase): @@ -13,9 +14,9 @@ def test_default_settings(self): 'image_preprocessing': {'patch_method': None}}, 'number_of_shards': 5, 'number_of_replicas' : 1,} """ - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) - ix = self.client.index(self.test_index_name) + ix = self.client.index(test_index_name) index_settings = ix.get_settings() fields = {'treat_urls_and_pointers_as_images', 'text_preprocessing', 'model', 'normalize_embeddings', 'image_preprocessing'} @@ -25,6 +26,7 @@ def test_default_settings(self): self.assertIn("number_of_replicas", index_settings) self.assertTrue(fields.issubset(set(index_settings['index_defaults']))) + @mark.ignore_cloud_tests def test_custom_settings(self): """adding custom settings to the index should be reflected in the returned output """ @@ -42,9 +44,9 @@ def test_custom_settings(self): } } - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name, settings_dict=index_settings) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name, settings_dict=index_settings) - ix = self.client.index(self.test_index_name) + ix = self.client.index(test_index_name) index_settings = ix.get_settings() fields = {'treat_urls_and_pointers_as_images', 'text_preprocessing', 'model', 'normalize_embeddings', 'image_preprocessing', 'model_properties'} @@ -55,9 +57,9 @@ def test_custom_settings(self): self.assertTrue(fields.issubset(set(index_settings['index_defaults']))) def test_settings_should_be_type_dict(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) - ix = self.client.index(self.test_index_name) + ix = self.client.index(test_index_name) index_settings = ix.get_settings() self.assertIsInstance(index_settings, dict) diff --git a/tests/v0_tests/test_image_chunking.py b/tests/v0_tests/test_image_chunking.py index 5043e27b..5cf8fc64 100644 --- a/tests/v0_tests/test_image_chunking.py +++ b/tests/v0_tests/test_image_chunking.py @@ -31,7 +31,7 @@ def test_image_no_chunking(self): "image_preprocessing_method" : None } - self.test_index_name = self.create_test_index(self.generic_test_index_name, **settings) + test_index_name = self.create_test_index(self.generic_test_index_name, **settings) temp_file_name = 'https://avatars.githubusercontent.com/u/13092433?v=4' @@ -40,21 +40,21 @@ def test_image_no_chunking(self): 'description': 'the image chunking can (optionally) chunk the image into sub-patches (aking to segmenting text) by using either a learned model or simple box generation and cropping', 'location': temp_file_name} - client.index(self.test_index_name).add_documents([document1], tensor_fields=['location', 'description', 'attributes']) + client.index(test_index_name).add_documents([document1], tensor_fields=['location', 'description', 'attributes']) # test the search works if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,'a') + self.warm_request(client.index(test_index_name).search,'a') - results = client.index(self.test_index_name).search('a') + results = client.index(test_index_name).search('a') print(results) assert results['hits'][0]['location'] == temp_file_name # search only the image location if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,'a', searchable_attributes=['location']) + self.warm_request(client.index(test_index_name).search,'a', searchable_attributes=['location']) - results = client.index(self.test_index_name).search('a', searchable_attributes=['location']) + results = client.index(test_index_name).search('a', searchable_attributes=['location']) print(results) assert results['hits'][0]['location'] == temp_file_name # the highlight should be the location @@ -77,7 +77,7 @@ def test_image_simple_chunking(self): "image_preprocessing_method":"simple" } - self.test_index_name = self.create_test_index(self.generic_test_index_name, **settings) + test_index_name = self.create_test_index(self.generic_test_index_name, **settings) temp_file_name = 'https://avatars.githubusercontent.com/u/13092433?v=4' @@ -88,21 +88,21 @@ def test_image_simple_chunking(self): 'description': 'the image chunking can (optionally) chunk the image into sub-patches (akin to segmenting text) by using either a learned model or simple box generation and cropping', 'location': temp_file_name} - client.index(self.test_index_name).add_documents([document1], tensor_fields=['location', 'description', 'attributes']) + client.index(test_index_name).add_documents([document1], tensor_fields=['location', 'description', 'attributes']) # test the search works if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,'a') + self.warm_request(client.index(test_index_name).search,'a') - results = client.index(self.test_index_name).search('a') + results = client.index(test_index_name).search('a') print(results) assert results['hits'][0]['location'] == temp_file_name # search only the image location if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,'a', searchable_attributes=['location']) + self.warm_request(client.index(test_index_name).search,'a', searchable_attributes=['location']) - results = client.index(self.test_index_name).search('a', searchable_attributes=['location']) + results = client.index(test_index_name).search('a', searchable_attributes=['location']) print(results) assert results['hits'][0]['location'] == temp_file_name # the highlight should be the location @@ -112,8 +112,8 @@ def test_image_simple_chunking(self): # search using the image itself, should return a full sized image as highlight if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,temp_file_name) + self.warm_request(client.index(test_index_name).search,temp_file_name) - results = client.index(self.test_index_name).search(temp_file_name) + results = client.index(test_index_name).search(temp_file_name) print(results) assert abs(np.array(results['hits'][0]['_highlights']['location']) - np.array([0, 0, img.size[0], img.size[1]])).sum() < 1e-6 \ No newline at end of file diff --git a/tests/v0_tests/test_index.py b/tests/v0_tests/test_index.py index 7abcdaaf..90f156f2 100644 --- a/tests/v0_tests/test_index.py +++ b/tests/v0_tests/test_index.py @@ -37,9 +37,13 @@ def test_create_index_settings_dict(self): False), ]: mock__post = mock.MagicMock() + mock_get = mock.MagicMock() + mock_get.return_value = {"index_status": "READY"} + @mock.patch("marqo._httprequests.HttpRequests.post", mock__post) + @mock.patch("marqo._httprequests.HttpRequests.get", mock_get) def run(): - self.test_index_name = self.create_test_index( + test_index_name = self.client.create_index( index_name=self.generic_test_index_name, settings_dict=settings_dict, **non_settings_dicts_param) @@ -50,7 +54,7 @@ def run(): is expected_treat_urls_and_pointers_as_images def test_get_documents(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) d1 = { "Title": "Treatise on the viability of rocket cars", "Blurb": "A rocket car is a car powered by a rocket engine. " @@ -65,10 +69,10 @@ def test_get_documents(self): "distant galaxies.", "_id": "article_985" } - self.client.index(self.test_index_name).add_documents([ + self.client.index(test_index_name).add_documents([ d1, d2 ], tensor_fields=["Blurb", "Title"]) - res = self.client.index(self.test_index_name).get_documents( + res = self.client.index(test_index_name).get_documents( ["article_152", "article_490", "article_985"] ) assert len(res['results']) == 3 @@ -81,7 +85,7 @@ def test_get_documents(self): assert doc_res['_found'] def test_get_documents_expose_facets(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) d1 = { "Title": "Treatise on the viability of rocket cars", "Blurb": "A rocket car is a car powered by a rocket engine. " @@ -96,10 +100,10 @@ def test_get_documents_expose_facets(self): "distant galaxies.", "_id": "article_985" } - self.client.index(self.test_index_name).add_documents([ + self.client.index(test_index_name).add_documents([ d1, d2 ], tensor_fields=["Blurb", "Title"]) - res = self.client.index(self.test_index_name).get_documents( + res = self.client.index(test_index_name).get_documents( ["article_152", "article_490", "article_985"], expose_facets=True ) @@ -117,7 +121,7 @@ def test_get_documents_expose_facets(self): assert doc_res['_found'] def test_get_document_expose_facets(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) d1 = { "Title": "Treatise on the viability of rocket cars", "Blurb": "A rocket car is a car powered by a rocket engine. " @@ -125,10 +129,10 @@ def test_get_document_expose_facets(self): "future of land-based transport.", "_id": "article_152" } - self.client.index(self.test_index_name).add_documents([ + self.client.index(test_index_name).add_documents([ d1 ], tensor_fields=["Blurb", "Title"]) - doc_res = self.client.index(self.test_index_name).get_document( + doc_res = self.client.index(test_index_name).get_document( document_id="article_152", expose_facets=True ) @@ -141,9 +145,12 @@ def test_get_document_expose_facets(self): def test_create_cloud_index(self): mock__post = mock.MagicMock() + mock_get = mock.MagicMock() + mock_get.return_value = {"index_status": "READY"} test_client = copy.deepcopy(self.client) test_client.config.api_key = 'some-super-secret-API-key' @mock.patch("marqo._httprequests.HttpRequests.post", mock__post) + @mock.patch("marqo._httprequests.HttpRequests.get", mock_get) def run(): test_client.create_index(index_name=self.generic_test_index_name) args, kwargs = mock__post.call_args @@ -157,9 +164,12 @@ def run(): @mark.ignore_cloud_tests def test_create_cloud_index_non_default_param(self): mock__post = mock.MagicMock() + mock_get = mock.MagicMock() + mock_get.return_value = {"index_status": "READY"} test_client = copy.deepcopy(self.client) test_client.config.api_key = 'some-super-secret-API-key' @mock.patch("marqo._httprequests.HttpRequests.post", mock__post) + @mock.patch("marqo._httprequests.HttpRequests.get", mock_get) def run(): # this is overridden by a create_index() default parameter test_client.create_index( @@ -172,14 +182,17 @@ def run(): return True assert run() - @mark.ignore_cloud_tests def test_create_cloud_index_settings_dict_precedence(self): """settings_dict overrides all cloud defaults""" mock__post = mock.MagicMock() + mock_get = mock.MagicMock() + mock_get.return_value = {"index_status": "READY"} test_client = copy.deepcopy(self.client) test_client.config.api_key = 'some-super-secret-API-key' + test_client.config.is_marqo_cloud = True @mock.patch("marqo._httprequests.HttpRequests.post", mock__post) + @mock.patch("marqo._httprequests.HttpRequests.get", mock_get) def run(): # this is overridden by a create_index() default parameter test_client.create_index( @@ -198,8 +211,8 @@ def test_create_custom_number_of_replicas(self): settings = { "number_of_replicas": intended_replicas } - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name, settings_dict=settings) - index_setting = self.client.index(self.test_index_name).get_settings() + test_index_name = self.create_test_index(index_name=self.generic_test_index_name, settings_dict=settings) + index_setting = self.client.index(test_index_name).get_settings() print(index_setting) assert intended_replicas == index_setting['number_of_replicas'] @@ -299,7 +312,8 @@ def test_create_marqo_cloud_index_wrong_inference_node_count(self, mock_get, moc def test_version_check_multiple_instantiation(self): """Ensure that duplicated instantiation of the client does not result in multiple APIs calls of get_marqo()""" with mock.patch("marqo.index.Index.get_marqo") as mock_get_marqo, \ - mock.patch("marqo.index.Index.get_status") as mock_get_status: + mock.patch("marqo.index.Index.get_status") as mock_get_status, \ + mock.patch("marqo.marqo_cloud_instance_mappings.MarqoCloudInstanceMappings.get_index_base_url") as mock_get_base_url: mock_get_status.return_value = {'index_status': 'READY'} mock_get_marqo.return_value = {'version': '0.0.0'} index = self.client.index(self.generic_test_index_name) @@ -318,9 +332,11 @@ def test_version_check_multiple_instantiation(self): def test_skipped_version_check_multiple_instantiation(self): """Ensure that the url labelled as `_skipped` only call get_marqo() once""" with mock.patch("marqo.index.Index.get_marqo") as mock_get_marqo, \ - mock.patch("marqo.index.Index.get_status") as mock_get_status: + mock.patch("marqo.index.Index.get_status") as mock_get_status, \ + mock.patch("marqo.marqo_cloud_instance_mappings.MarqoCloudInstanceMappings.get_index_base_url") as mock_get_base_url: mock_get_status.return_value = {'index_status': 'READY'} mock_get_marqo.side_effect = requests.exceptions.RequestException("test") + mock_get_base_url.return_value = self.client_settings["url"] index = self.client.index(self.generic_test_index_name) mock_get_marqo.assert_called_once() @@ -342,8 +358,12 @@ def test_error_handling_in_version_check(self): KeyError("test"), KeyError("test"), requests.exceptions.Timeout("test")] for i, side_effect in enumerate(side_effect_list): with mock.patch("marqo.index.mq_logger.warning") as mock_warning, \ - mock.patch("marqo.index.Index.get_marqo") as mock_get_marqo: + mock.patch("marqo.index.Index.get_marqo") as mock_get_marqo, \ + mock.patch("marqo.index.Index.get_status") as mock_get_status, \ + mock.patch("marqo.marqo_cloud_instance_mappings.MarqoCloudInstanceMappings.get_index_base_url") as mock_get_base_url: mock_get_marqo.side_effect = side_effect + mock_get_status.return_value = {'index_status': 'READY'} + mock_get_base_url.return_value = self.client_settings["url"] index = self.client.index(self.generic_test_index_name) mock_get_marqo.assert_called_once() @@ -363,9 +383,12 @@ def test_error_handling_in_version_check(self): def test_version_check_instantiation(self): with mock.patch("marqo.index.mq_logger.warning") as mock_warning, \ mock.patch("marqo.index.Index.get_marqo") as mock_get_marqo, \ - mock.patch("marqo.index.Index.get_status") as mock_get_status: + mock.patch("marqo.index.Index.get_status") as mock_get_status, \ + mock.patch("marqo.marqo_cloud_instance_mappings.MarqoCloudInstanceMappings.get_index_base_url") as mock_get_base_url: mock_get_marqo.return_value = {'version': '0.0.0'} mock_get_status.return_value = {'index_status': 'READY'} + mock_get_base_url.return_value = self.client_settings['url'] + index = self.client.index(self.generic_test_index_name) mock_get_marqo.assert_called_once() @@ -380,7 +403,10 @@ def test_version_check_instantiation(self): self.assertIn("Please upgrade your Marqo instance to avoid potential errors.", warning_message) # Assert the url is in the cache - self.assertIn(self.client_settings['url'], marqo_url_and_version_cache) + self.assertIn( + self.client.config.instance_mapping.get_index_base_url(self.generic_test_index_name), + marqo_url_and_version_cache + ) assert marqo_url_and_version_cache[self.client_settings['url']] == '0.0.0' def test_skip_version_check_for_previously_labelled_url(self): diff --git a/tests/v0_tests/test_logging.py b/tests/v0_tests/test_logging.py index e05efcb4..7cd0a827 100644 --- a/tests/v0_tests/test_logging.py +++ b/tests/v0_tests/test_logging.py @@ -44,16 +44,16 @@ def _create_img_index(self, index_name): return self.create_test_index(index_name=index_name, treat_urls_and_pointers_as_images=True, model='ViT-B/32') def test_add_document_warnings_no_batching(self): - self.test_index_name = self._create_img_index(index_name=self.generic_test_index_name) + test_index_name = self._create_img_index(index_name=self.generic_test_index_name) with self.assertLogs('marqo', level='INFO') as cm: - self.client.index(index_name=self.test_index_name).add_documents(self._get_docs_to_index(), device="cpu", + self.client.index(index_name=test_index_name).add_documents(self._get_docs_to_index(), device="cpu", tensor_fields=["Title"]) assert len(cm.output) == 1 assert "errors detected" in cm.output[0].lower() assert "info" in cm.output[0].lower() def test_add_document_warnings_client_batching(self): - self.test_index_name = self._create_img_index(index_name=self.generic_test_index_name) + test_index_name = self._create_img_index(index_name=self.generic_test_index_name) params_expected = [ # so no client batching, that means no batch info output, and therefore only 1 warning ({}, {"num_log_msgs": 1, "num_errors_msgs": 1}), @@ -64,7 +64,7 @@ def test_add_document_warnings_client_batching(self): for params, expected in params_expected: with self.assertLogs('marqo', level='INFO') as cm: - self.client.index(index_name=self.test_index_name).add_documents( + self.client.index(index_name=test_index_name).add_documents( documents=self._get_docs_to_index(), device="cpu", **params, tensor_fields=["Title"]) print(params, expected) assert len(cm.output) == expected['num_log_msgs'] diff --git a/tests/v0_tests/test_model_cache_management.py b/tests/v0_tests/test_model_cache_management.py index 9a9f424d..77fac7e5 100644 --- a/tests/v0_tests/test_model_cache_management.py +++ b/tests/v0_tests/test_model_cache_management.py @@ -17,8 +17,8 @@ class TestModelCacheManagement(MarqoTestCase): def test_get_cuda_info(self) -> None: try: settings = {"model": self.MODEL} - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name, **settings) - res = self.client.index(self.test_index_name).get_cuda_info() + test_index_name = self.create_test_index(index_name=self.generic_test_index_name, **settings) + res = self.client.index(test_index_name).get_cuda_info() if "cuda_devices" not in res: raise AssertionError # catch error if no cuda device in marqo @@ -27,8 +27,8 @@ def test_get_cuda_info(self) -> None: def test_get_cpu_info(self) -> None: settings = {"model": self.MODEL} - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name, **settings) - res = self.client.index(self.test_index_name).get_cpu_info() + test_index_name = self.create_test_index(index_name=self.generic_test_index_name, **settings) + res = self.client.index(test_index_name).get_cpu_info() if "cpu_usage_percent" not in res: raise AssertionError @@ -41,8 +41,8 @@ def test_get_cpu_info(self) -> None: def test_get_loaded_models(self) -> None: settings = {"model": self.MODEL} - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name, **settings) - res = self.client.index(self.test_index_name).get_loaded_models() + test_index_name = self.create_test_index(index_name=self.generic_test_index_name, **settings) + res = self.client.index(test_index_name).get_loaded_models() if "models" not in res: raise AssertionError @@ -51,8 +51,8 @@ def test_eject_no_cached_model(self) -> None: # test a model that is not cached try: settings = {"model": self.MODEL} - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name, **settings) - res = self.client.index(self.test_index_name).eject_model("void_model", "void_device") + test_index_name = self.create_test_index(index_name=self.generic_test_index_name, **settings) + res = self.client.index(test_index_name).eject_model("void_model", "void_device") raise AssertionError except MarqoWebError: pass @@ -63,13 +63,13 @@ def test_eject_model(self) -> None: settings = {"model": self.MODEL} - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name, **settings) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name, **settings) d1 = { "doc title": "Cool Document 1", "field 1": "some extra info" } - self.client.index(self.test_index_name).add_documents([d1], device="cpu", tensor_fields=["doc title", "field 1"]) - res = self.client.index(self.test_index_name).eject_model(self.MODEL, "cpu") + self.client.index(test_index_name).add_documents([d1], device="cpu", tensor_fields=["doc title", "field 1"]) + res = self.client.index(test_index_name).eject_model(self.MODEL, "cpu") assert res["result"] == "success" assert res["message"].startswith("successfully eject") diff --git a/tests/v0_tests/test_score_modifier_search.py b/tests/v0_tests/test_score_modifier_search.py index c2bf209b..31dd9623 100644 --- a/tests/v0_tests/test_score_modifier_search.py +++ b/tests/v0_tests/test_score_modifier_search.py @@ -3,6 +3,7 @@ from marqo.client import Client from marqo.errors import MarqoApiError, MarqoWebError from tests.marqo_test import MarqoTestCase +from pytest import mark class TestScoreModifierSearch(MarqoTestCase): @@ -30,15 +31,6 @@ def setUp(self) -> None: "filter"] ) self.query = "what is the rider doing?" - - def tearDown(self) -> None: - if not self.client.config.is_marqo_cloud: - try: - self.client.delete_index(self.test_index_name) - except MarqoApiError as s: - pass - else: - self.delete_documents(self.test_index_name) def search_with_score_modifier(self, score_modifiers: Optional[Dict[str, List[Dict[str, Any]]]] = None, **kwargs) -> Dict[str, Any]: return self.client.index(self.test_index_name).search( @@ -106,6 +98,7 @@ def test_valid_score_modifiers_format(self): } self.search_with_score_modifier(score_modifiers=valid_score_modifiers) +@mark.ignore_cloud_tests class TestScoreModifierBulkSearch(TestScoreModifierSearch): def map_search_kwargs(self, k: str) -> str: diff --git a/tests/v0_tests/test_sentence_chunking.py b/tests/v0_tests/test_sentence_chunking.py index bc297913..9a103757 100644 --- a/tests/v0_tests/test_sentence_chunking.py +++ b/tests/v0_tests/test_sentence_chunking.py @@ -24,7 +24,7 @@ def test_sentence_no_chunking(self): "sentence_overlap":0 } - self.test_index_name = self.create_test_index(self.generic_test_index_name, **settings) + test_index_name = self.create_test_index(self.generic_test_index_name, **settings) document1 = {'_id': '1', # '_id' can be provided but is not required @@ -32,13 +32,13 @@ def test_sentence_no_chunking(self): 'description': 'the image chunking. can (optionally) chunk. the image into sub-patches (aking to segmenting text). by using either. a learned model. or simple box generation and cropping.', 'misc':'sasasasaifjfnonfqeno asadsdljknjdfln'} - client.index(self.test_index_name).add_documents([document1], tensor_fields=['attributes', 'description', 'misc']) + client.index(test_index_name).add_documents([document1], tensor_fields=['attributes', 'description', 'misc']) # test the search works if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,'a') + self.warm_request(client.index(test_index_name).search,'a') - results = client.index(self.test_index_name).search('a') + results = client.index(test_index_name).search('a') print(results) assert results['hits'][0]['attributes'] == document1['attributes'] @@ -55,7 +55,7 @@ def test_sentence_chunking_no_overlap(self): "sentence_overlap":0 } - self.test_index_name = self.create_test_index(self.generic_test_index_name, **settings) + test_index_name = self.create_test_index(self.generic_test_index_name, **settings) document1 = {'_id': '1', # '_id' can be provided but is not required @@ -63,15 +63,15 @@ def test_sentence_chunking_no_overlap(self): 'description': 'the image chunking. can (optionally) chunk. the image into sub-patches (aking to segmenting text). by using either. a learned model. or simple box generation and cropping.', 'misc':'sasasasaifjfnonfqeno asadsdljknjdfln'} - client.index(self.test_index_name).add_documents([document1], tensor_fields=['attributes', 'description', 'misc']) + client.index(test_index_name).add_documents([document1], tensor_fields=['attributes', 'description', 'misc']) # search with a term we know is an exact chunk and will then show in the highlights search_term = 'hello. how are you.' if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,search_term) + self.warm_request(client.index(test_index_name).search,search_term) - results = client.index(self.test_index_name).search(search_term) + results = client.index(test_index_name).search(search_term) print(results) assert results['hits'][0]['_highlights']['attributes'] == search_term @@ -79,9 +79,9 @@ def test_sentence_chunking_no_overlap(self): search_term = 'the image into sub-patches (aking to segmenting text). by using either.' if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,search_term) + self.warm_request(client.index(test_index_name).search,search_term) - results = client.index(self.test_index_name).search(search_term) + results = client.index(test_index_name).search(search_term) print(results) assert results['hits'][0]['_highlights']['description'] == search_term @@ -89,9 +89,9 @@ def test_sentence_chunking_no_overlap(self): search_term = 'sasasasaifjfnonfqeno asadsdljknjdfln' if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,search_term) + self.warm_request(client.index(test_index_name).search,search_term) - results = client.index(self.test_index_name).search(search_term) + results = client.index(test_index_name).search(search_term) print(results) assert results['hits'][0]['_highlights']['misc'] == search_term @@ -99,9 +99,9 @@ def test_sentence_chunking_no_overlap(self): search_term = 'can (optionally) chunk.' if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,search_term) + self.warm_request(client.index(test_index_name).search,search_term) - results = client.index(self.test_index_name).search(search_term) + results = client.index(test_index_name).search(search_term) print(results) assert results['hits'][0]['_highlights']['description'] == 'the image chunking. can (optionally) chunk.' @@ -114,7 +114,7 @@ def test_sentence_chunking_overlap(self): "sentence_overlap":1 } - self.test_index_name = self.create_test_index(self.generic_test_index_name, **settings) + test_index_name = self.create_test_index(self.generic_test_index_name, **settings) document1 = {'_id': '1', # '_id' can be provided but is not required @@ -122,15 +122,15 @@ def test_sentence_chunking_overlap(self): 'description': 'the image chunking. can (optionally) chunk. the image into sub-patches (aking to segmenting text). by using either. a learned model. or simple box generation and cropping.', 'misc':'sasasasaifjfnonfqeno asadsdljknjdfln'} - client.index(self.test_index_name).add_documents([document1], tensor_fields=['attributes', 'description', 'misc']) + client.index(test_index_name).add_documents([document1], tensor_fields=['attributes', 'description', 'misc']) # search with a term we know is an exact chunk and will then show in the highlights search_term = 'hello. how are you.' if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,search_term) + self.warm_request(client.index(test_index_name).search,search_term) - results = client.index(self.test_index_name).search(search_term) + results = client.index(test_index_name).search(search_term) print(results) assert results['hits'][0]['_highlights']['attributes'] == search_term @@ -138,9 +138,9 @@ def test_sentence_chunking_overlap(self): search_term = 'the image into sub-patches (aking to segmenting text). by using either.' if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,search_term) + self.warm_request(client.index(test_index_name).search,search_term) - results = client.index(self.test_index_name).search(search_term) + results = client.index(test_index_name).search(search_term) print(results) assert results['hits'][0]['_highlights']['description'] == search_term @@ -148,9 +148,9 @@ def test_sentence_chunking_overlap(self): search_term = 'sasasasaifjfnonfqeno asadsdljknjdfln' if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,search_term) + self.warm_request(client.index(test_index_name).search,search_term) - results = client.index(self.test_index_name).search(search_term) + results = client.index(test_index_name).search(search_term) print(results) assert results['hits'][0]['_highlights']['misc'] == search_term @@ -158,9 +158,9 @@ def test_sentence_chunking_overlap(self): search_term = 'can (optionally) chunk.' if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,search_term) + self.warm_request(client.index(test_index_name).search,search_term) - results = client.index(self.test_index_name).search(search_term) + results = client.index(test_index_name).search(search_term) print(results) assert results['hits'][0]['_highlights']['description'] == 'the image chunking. can (optionally) chunk.' @@ -168,17 +168,17 @@ def test_sentence_chunking_overlap(self): search_term = "can (optionally) chunk. the image into sub-patches (aking to segmenting text)." if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,search_term) + self.warm_request(client.index(test_index_name).search,search_term) - results = client.index(self.test_index_name).search(search_term) + results = client.index(test_index_name).search(search_term) print(results) assert results['hits'][0]['_highlights']['description'] == search_term search_term = "the image into sub-patches (aking to segmenting text). by using either." if self.IS_MULTI_INSTANCE: - self.warm_request(client.index(self.test_index_name).search,search_term) + self.warm_request(client.index(test_index_name).search,search_term) - results = client.index(self.test_index_name).search(search_term) + results = client.index(test_index_name).search(search_term) print(results) assert results['hits'][0]['_highlights']['description'] == search_term diff --git a/tests/v0_tests/test_telemetry.py b/tests/v0_tests/test_telemetry.py index 501be766..0ab1bb35 100644 --- a/tests/v0_tests/test_telemetry.py +++ b/tests/v0_tests/test_telemetry.py @@ -14,7 +14,7 @@ def setUp(self) -> None: def test_telemetry_add_documents(self): number_of_docs = 10 - self.test_index_name = self.create_test_index(self.generic_test_index_name) + test_index_name = self.create_test_index(self.generic_test_index_name) doc = [{"Title": "Marqo is useful", "Description": "Marqo is a very useful tool"}, ] * number_of_docs @@ -27,10 +27,10 @@ def test_telemetry_add_documents(self): ] if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).add_documents, **kwargs_list[0]) + self.warm_request(self.client.index(test_index_name).add_documents, **kwargs_list[0]) for kwargs in kwargs_list: - res = self.client.index(self.test_index_name).add_documents(**kwargs) + res = self.client.index(test_index_name).add_documents(**kwargs) if kwargs["client_batch_size"] is not None: assert len(res) == math.ceil(float(number_of_docs) / kwargs["client_batch_size"]) assert all(["telemetry" in i for i in res]) @@ -47,24 +47,24 @@ def test_telemetry_search(self): {"q": "search query","search_method": "LEXICAL"}, {"q": "search query","searchable_attributes": ["Description"]}] - self.test_index_name = self.create_test_index(self.generic_test_index_name) - self.client.index(self.test_index_name).add_documents([{"Title": "A dummy document",}], tensor_fields=["Title"]) + test_index_name = self.create_test_index(self.generic_test_index_name) + self.client.index(test_index_name).add_documents([{"Title": "A dummy document",}], tensor_fields=["Title"]) if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, **search_kwargs_list[0]) + self.warm_request(self.client.index(test_index_name).search, **search_kwargs_list[0]) for kwargs in search_kwargs_list: - res = self.client.index(self.test_index_name).search(**kwargs) + res = self.client.index(test_index_name).search(**kwargs) self.assertIn("telemetry", res) self.assertIn("timesMs", res["telemetry"]) @mark.ignore_cloud_tests def test_telemetry_bulk_search(self): - self.test_index_name = self.create_test_index(self.generic_test_index_name) - self.client.index(self.test_index_name).add_documents([{"Title": "A dummy document",}], tensor_fields=["Title"]) + test_index_name = self.create_test_index(self.generic_test_index_name) + self.client.index(test_index_name).add_documents([{"Title": "A dummy document",}], tensor_fields=["Title"]) bulk_search_query = [ { - "index": self.test_index_name, + "index": test_index_name, "q": "what is the best outfit to wear on the moon?", "searchableAttributes": ["Description"], "limit": 10, @@ -75,7 +75,7 @@ def test_telemetry_bulk_search(self): "attributesToRetrieve": ["Title", "Description"] }, { - "index": self.test_index_name, + "index": test_index_name, "attributesToRetrieve": ["_id"], "q": {"what is the best outfit to wear on mars?": 0.5, "what is the worst outfit to wear on mars?": 0.3} }] @@ -94,18 +94,18 @@ def test_telemetry_bulk_search(self): assert all([field in res["telemetry"]["timesMs"] for field in expected_fields]) def test_telemetry_get_document(self): - self.test_index_name = self.create_test_index(self.generic_test_index_name) - self.client.index(self.test_index_name).add_documents([{"_id": "123321", "Title": "Marqo is useful",}], + test_index_name = self.create_test_index(self.generic_test_index_name) + self.client.index(test_index_name).add_documents([{"_id": "123321", "Title": "Marqo is useful",}], tensor_fields=["Title"]) - res = self.client.index(self.test_index_name).get_document("123321") + res = self.client.index(test_index_name).get_document("123321") self.assertIn("telemetry", res) self.assertEqual(res["telemetry"], dict()) def test_delete_documents(self): - self.test_index_name = self.create_test_index(self.generic_test_index_name) - self.client.index(self.test_index_name).add_documents([{"_id": "123321", "Title": "Marqo is useful",}], + test_index_name = self.create_test_index(self.generic_test_index_name) + self.client.index(test_index_name).add_documents([{"_id": "123321", "Title": "Marqo is useful",}], tensor_fields=["Title"]) - res = self.client.index(self.test_index_name).delete_documents(["123321"]) + res = self.client.index(test_index_name).delete_documents(["123321"]) self.assertIn("telemetry", res) self.assertEqual(res["telemetry"], dict()) diff --git a/tests/v0_tests/test_tensor_search.py b/tests/v0_tests/test_tensor_search.py index 1383be23..fa0dc8db 100644 --- a/tests/v0_tests/test_tensor_search.py +++ b/tests/v0_tests/test_tensor_search.py @@ -31,20 +31,20 @@ def strip_marqo_fields(doc, strip_id=True): def test_search_single(self): """Searches an index of a single doc. Checks the basic functionality and response structure""" - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) d1 = { "Title": "This is a title about some doc. ", "Description": """The Guardian is a British daily newspaper. It was founded in 1821 as The Manchester Guardian, and changed its name in 1959.[5] Along with its sister papers The Observer and The Guardian Weekly, The Guardian is part of the Guardian Media Group, owned by the Scott Trust.[6] The trust was created in 1936 to "secure the financial and editorial independence of The Guardian in perpetuity and to safeguard the journalistic freedom and liberal values of The Guardian free from commercial or political interference".[7] The trust was converted into a limited company in 2008, with a constitution written so as to maintain for The Guardian the same protections as were built into the structure of the Scott Trust by its creators. Profits are reinvested in journalism rather than distributed to owners or shareholders.[7] It is considered a newspaper of record in the UK.[8][9] The editor-in-chief Katharine Viner succeeded Alan Rusbridger in 2015.[10][11] Since 2018, the paper's main newsprint sections have been published in tabloid format. As of July 2021, its print edition had a daily circulation of 105,134.[4] The newspaper has an online edition, TheGuardian.com, as well as two international websites, Guardian Australia (founded in 2013) and Guardian US (founded in 2011). The paper's readership is generally on the mainstream left of British political opinion,[12][13][14][15] and the term "Guardian reader" is used to imply a stereotype of liberal, left-wing or "politically correct" views.[3] Frequent typographical errors during the age of manual typesetting led Private Eye magazine to dub the paper the "Grauniad" in the 1960s, a nickname still used occasionally by the editors for self-mockery.[16] """ } - add_doc_res = self.client.index(self.test_index_name).add_documents([d1], tensor_fields=["Title", "Description"]) + add_doc_res = self.client.index(test_index_name).add_documents([d1], tensor_fields=["Title", "Description"]) if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, "title about some doc") - search_res = self.client.index(self.test_index_name).search( + search_res = self.client.index(test_index_name).search( "title about some doc") assert len(search_res["hits"]) == 1 assert self.strip_marqo_fields(search_res["hits"][0]) == d1 @@ -52,20 +52,20 @@ def test_search_single(self): assert ("Title" in search_res["hits"][0]["_highlights"]) or ("Description" in search_res["hits"][0]["_highlights"]) def test_search_empty_index(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, "title about some doc") - search_res = self.client.index(self.test_index_name).search( + search_res = self.client.index(test_index_name).search( "title about some doc") assert len(search_res["hits"]) == 0 def test_search_highlights(self): """Tests if show_highlights works and if the deprecation behaviour is expected""" - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) - self.client.index(index_name=self.test_index_name).add_documents([{"f1": "some doc"}], tensor_fields=["f1"]) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + self.client.index(index_name=test_index_name).add_documents([{"f1": "some doc"}], tensor_fields=["f1"]) for params, expected_highlights_presence in [ ({"highlights": True, "show_highlights": False}, False), ({"highlights": False, "show_highlights": True}, False), @@ -78,15 +78,15 @@ def test_search_highlights(self): ]: if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, "title about some doc", **params) - search_res = self.client.index(self.test_index_name).search( + search_res = self.client.index(test_index_name).search( "title about some doc", **params) assert ("_highlights" in search_res["hits"][0]) is expected_highlights_presence def test_search_multi(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) d1 = { "doc title": "Cool Document 1", "field 1": "some extra info", @@ -97,21 +97,21 @@ def test_search_multi(self): "field X": "this is a solid doc", "_id": "123456" } - res = self.client.index(self.test_index_name).add_documents([ + res = self.client.index(test_index_name).add_documents([ d1, d2 ], tensor_fields=["doc title", "field X"]) if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, "this is a solid doc") - search_res = self.client.index(self.test_index_name).search( + search_res = self.client.index(test_index_name).search( "this is a solid doc") assert d2 == self.strip_marqo_fields(search_res['hits'][0], strip_id=False) assert search_res['hits'][0]['_highlights']["field X"] == "this is a solid doc" def test_select_lexical(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) d1 = { "doc title": "Very heavy, dense metallic lead.", "field 1": "some extra info", @@ -123,17 +123,17 @@ def test_select_lexical(self): "field X": "this is a solid doc", "_id": "123456" } - res = self.client.index(self.test_index_name).add_documents([ + res = self.client.index(test_index_name).add_documents([ d1, d2 ], tensor_fields=['doc title', 'field X']) # Ensure that vector search works if self.IS_MULTI_INSTANCE: time.sleep(5) - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, "Examples of leadership", search_method=enums.SearchMethods.TENSOR) - search_res = self.client.index(self.test_index_name).search( + search_res = self.client.index(test_index_name).search( "Examples of leadership", search_method=enums.SearchMethods.TENSOR) assert d2 == self.strip_marqo_fields(search_res["hits"][0], strip_id=False) assert search_res["hits"][0]['_highlights']["doc title"].startswith("The captain bravely lead her followers") @@ -141,18 +141,18 @@ def test_select_lexical(self): # try it with lexical search: # can't find the above with synonym if self.IS_MULTI_INSTANCE: - self.client.index(self.test_index_name).search( + self.client.index(test_index_name).search( "Examples of leadership", search_method=marqo.SearchMethods.LEXICAL) - assert len(self.client.index(self.test_index_name).search( + assert len(self.client.index(test_index_name).search( "Examples of leadership", search_method=marqo.SearchMethods.LEXICAL)["hits"]) == 0 # but can look for a word if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, '"captain"') - assert self.client.index(self.test_index_name).search( + assert self.client.index(test_index_name).search( '"captain"')["hits"][0]["_id"] == "123456" def test_search_with_device(self): @@ -182,7 +182,7 @@ def run(): assert "device" not in kwargs0["path"] def test_filter_string_and_searchable_attributes(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) docs = [ { "_id": "0", # content in field_a @@ -211,7 +211,7 @@ def test_filter_string_and_searchable_attributes(self): "int_for_filtering": 1, } ] - res = self.client.index(self.test_index_name).add_documents(docs,auto_refresh=True, tensor_fields=["field_a", "field_b"]) + res = self.client.index(test_index_name).add_documents(docs,auto_refresh=True, tensor_fields=["field_a", "field_b"]) test_cases = ( { # filter string only (str) @@ -260,13 +260,13 @@ def test_filter_string_and_searchable_attributes(self): for case in test_cases: if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, case["query"], filter_string=case.get("filter_string", ""), searchable_attributes=case.get("searchable_attributes", None) ) - search_res = self.client.index(self.test_index_name).search( + search_res = self.client.index(test_index_name).search( case["query"], filter_string=case.get("filter_string", ""), searchable_attributes=case.get("searchable_attributes", None) @@ -276,7 +276,7 @@ def test_filter_string_and_searchable_attributes(self): def test_filter_on_nested_docs(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) docs = [ { "_id": "filter_in_tag", @@ -316,7 +316,7 @@ def test_filter_on_nested_docs(self): } } } - self.client.index(self.test_index_name).add_documents(docs, mappings=mappings_object, auto_refresh=True, tensor_fields=["content", "combined_text_field"]) + self.client.index(test_index_name).add_documents(docs, mappings=mappings_object, auto_refresh=True, tensor_fields=["content", "combined_text_field"]) test_cases = ( { # Test where only "tag" field contains "TO_FILTER" @@ -364,7 +364,7 @@ def test_filter_on_nested_docs(self): for case in test_cases[0:3]: print(f"THE CASE IS: {case}") - search_res = self.client.index(self.test_index_name).search( + search_res = self.client.index(test_index_name).search( case["query"], filter_string=case.get("filter_string", ""), ) @@ -372,7 +372,7 @@ def test_filter_on_nested_docs(self): assert set([hit["_id"] for hit in search_res["hits"]]) == set(case["expected"]) def test_attributes_to_retrieve(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) d1 = { "doc title": "Very heavy, dense metallic lead.", "abc-123": "some text blah", @@ -387,7 +387,7 @@ def test_attributes_to_retrieve(self): "an_int": 2345678, "_id": "123456" } - x = self.client.index(self.test_index_name).add_documents([ + x = self.client.index(test_index_name).add_documents([ d1, d2 ], tensor_fields=['doc title', 'field X', 'field1', 'abc-123', 'an_int'], auto_refresh=True) atts = ["doc title", "an_int"] @@ -395,12 +395,12 @@ def test_attributes_to_retrieve(self): enums.SearchMethods.LEXICAL]: if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, q="blah blah", attributes_to_retrieve=atts, search_method=search_method ) - search_res = self.client.index(self.test_index_name).search( + search_res = self.client.index(test_index_name).search( q="blah blah", attributes_to_retrieve=atts, search_method=search_method ) @@ -410,7 +410,7 @@ def test_attributes_to_retrieve(self): def test_pagination_single_field(self): - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) # 100 random words vocab_source = "https://www.mit.edu/~ecprice/wordlist.10000" @@ -421,15 +421,15 @@ def test_pagination_single_field(self): } for i in range(num_docs)] - self.client.index(index_name=self.test_index_name).add_documents( + self.client.index(index_name=test_index_name).add_documents( docs, tensor_fields=["Title"], auto_refresh=False, client_batch_size=50 ) - self.client.index(index_name=self.test_index_name).refresh() + self.client.index(index_name=test_index_name).refresh() for search_method in (enums.SearchMethods.TENSOR, enums.SearchMethods.LEXICAL): for doc_count in [100]: # Query full results - full_search_results = self.client.index(self.test_index_name).search( + full_search_results = self.client.index(test_index_name).search( search_method=search_method, q='a', limit=doc_count) @@ -442,12 +442,12 @@ def test_pagination_single_field(self): off = page_num * page_size if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, search_method=search_method, q='a', limit=lim, offset=off) - page_res = self.client.index(self.test_index_name).search( + page_res = self.client.index(test_index_name).search( search_method=search_method, q='a', limit=lim, offset=off) @@ -474,8 +474,8 @@ def test_multi_queries(self): 'treat_urls_and_pointers_as_images': True } } - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name, settings_dict=image_index_config) - self.client.index(index_name=self.test_index_name).add_documents( + test_index_name = self.create_test_index(index_name=self.generic_test_index_name, settings_dict=image_index_config) + self.client.index(index_name=test_index_name).add_documents( documents=docs, tensor_fields=['loc a', 'loc b'], auto_refresh=True ) queries_expected_ordering = [ @@ -488,7 +488,7 @@ def test_multi_queries(self): ['artefact_hippo', 'realistic_hippo']), ] for query, expected_ordering in queries_expected_ordering: - res = self.client.index(index_name=self.test_index_name).search( + res = self.client.index(index_name=test_index_name).search( q=query, search_method="TENSOR") print(res) @@ -505,13 +505,13 @@ def test_escaped_non_tensor_field(self): "dont#tensorise Me": "Dog", "tensorise_me": "quarterly earnings report" }] - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) - self.client.index(index_name=self.test_index_name).add_documents( + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + self.client.index(index_name=test_index_name).add_documents( docs, auto_refresh=True, non_tensor_fields=["dont#tensorise Me"] ) if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, q='Blah') - search_res = self.client.index(index_name=self.test_index_name).search("Dog") + self.warm_request(self.client.index(test_index_name).search, q='Blah') + search_res = self.client.index(index_name=test_index_name).search("Dog") assert list(search_res['hits'][0]['_highlights'].keys()) == ['tensorise_me'] def test_special_characters(self): @@ -532,21 +532,21 @@ def test_special_characters(self): filter_field: "Alpaca" } ] - self.test_index_name = self.create_test_index(index_name=self.generic_test_index_name) - self.client.index(index_name=self.test_index_name).add_documents( + test_index_name = self.create_test_index(index_name=self.generic_test_index_name) + self.client.index(index_name=test_index_name).add_documents( docs, auto_refresh=True, non_tensor_fields=[field_to_not_search] ) search_filter_field = f"filter{filter_char}me" if self.IS_MULTI_INSTANCE: - self.warm_request(self.client.index(self.test_index_name).search, + self.warm_request(self.client.index(test_index_name).search, q="Dog", searchable_attributes=[field_to_search, field_to_not_search], attributes_to_retrieve=[field_to_not_search], filter_string=f'{search_filter_field}:Walrus' ) - search1_res = self.client.index(index_name=self.test_index_name).search( + search1_res = self.client.index(index_name=test_index_name).search( "Dog", searchable_attributes=[field_to_search, field_to_not_search], attributes_to_retrieve=[field_to_not_search], filter_string=f'{search_filter_field}:Walrus' diff --git a/tox.ini b/tox.ini index 22ec84b0..af69e970 100644 --- a/tox.ini +++ b/tox.ini @@ -27,5 +27,4 @@ deps = pillow numpy commands = - pytest {posargs} -m "not ignore_cloud_tests" - python tests/scripts/delete_all_indexes.py \ No newline at end of file + python tests/scripts/run_cloud_tests.py \ No newline at end of file