From 5e9f539f5de93f2aa576e3e2ce348d9fa42869e7 Mon Sep 17 00:00:00 2001 From: Raynor Chavez Date: Wed, 4 Sep 2024 22:24:04 +0900 Subject: [PATCH] debugged client and added tests --- src/marqo/client.py | 4 + src/marqo/index.py | 6 ++ src/marqo/models/create_index_settings.py | 2 + src/marqo/models/marqo_index.py | 10 +++ tests/marqo_test.py | 22 ++++++ tests/v2_tests/test_create_index.py | 95 +++++++++++++++++++++++ tests/v2_tests/test_embed.py | 57 ++++++++++++++ 7 files changed, 196 insertions(+) diff --git a/src/marqo/client.py b/src/marqo/client.py index 4fd26ca5..1b97fda0 100644 --- a/src/marqo/client.py +++ b/src/marqo/client.py @@ -78,6 +78,8 @@ def create_index( normalize_embeddings: Optional[bool] = None, text_preprocessing: Optional[marqo_index.TextPreProcessing] = None, image_preprocessing: Optional[marqo_index.ImagePreProcessing] = None, + audio_preprocessing: Optional[marqo_index.AudioPreProcessing] = None, + video_preprocessing: Optional[marqo_index.VideoPreProcessing] = None, vector_numeric_type: Optional[marqo_index.VectorNumericType] = None, ann_parameters: Optional[marqo_index.AnnParameters] = None, wait_for_readiness: bool = True, @@ -141,6 +143,8 @@ def create_index( normalize_embeddings=normalize_embeddings, text_preprocessing=text_preprocessing, image_preprocessing=image_preprocessing, + audio_preprocessing=audio_preprocessing, + video_preprocessing=video_preprocessing, vector_numeric_type=vector_numeric_type, ann_parameters=ann_parameters, wait_for_readiness=wait_for_readiness, diff --git a/src/marqo/index.py b/src/marqo/index.py index 2f00281a..7f28b7e8 100644 --- a/src/marqo/index.py +++ b/src/marqo/index.py @@ -89,6 +89,8 @@ def create(config: Config, normalize_embeddings: Optional[bool] = None, text_preprocessing: Optional[marqo_index.TextPreProcessing] = None, image_preprocessing: Optional[marqo_index.ImagePreProcessing] = None, + audio_preprocessing: Optional[marqo_index.AudioPreProcessing] = None, + video_preprocessing: Optional[marqo_index.VideoPreProcessing] = None, vector_numeric_type: Optional[marqo_index.VectorNumericType] = None, ann_parameters: Optional[marqo_index.AnnParameters] = None, inference_type: Optional[str] = None, @@ -158,6 +160,8 @@ def create(config: Config, normalizeEmbeddings=normalize_embeddings, textPreprocessing=text_preprocessing, imagePreprocessing=image_preprocessing, + audioPreprocessing=audio_preprocessing, + videoPreprocessing=video_preprocessing, vectorNumericType=vector_numeric_type, annParameters=ann_parameters, textChunkPrefix=text_chunk_prefix, @@ -181,6 +185,8 @@ def create(config: Config, normalizeEmbeddings=normalize_embeddings, textPreprocessing=text_preprocessing, imagePreprocessing=image_preprocessing, + audioPreprocessing=audio_preprocessing, + videoPreprocessing=video_preprocessing, vectorNumericType=vector_numeric_type, annParameters=ann_parameters, numberOfInferences=number_of_inferences, diff --git a/src/marqo/models/create_index_settings.py b/src/marqo/models/create_index_settings.py index 0823f32f..a9fac9bb 100644 --- a/src/marqo/models/create_index_settings.py +++ b/src/marqo/models/create_index_settings.py @@ -43,6 +43,8 @@ class IndexSettings(MarqoBaseModel): normalizeEmbeddings: Optional[bool] = None textPreprocessing: Optional[marqo_index.TextPreProcessing] = None imagePreprocessing: Optional[marqo_index.ImagePreProcessing] = None + audioPreprocessing: Optional[marqo_index.AudioPreProcessing] = None + videoPreprocessing: Optional[marqo_index.VideoPreProcessing] = None vectorNumericType: Optional[marqo_index.VectorNumericType] = None annParameters: Optional[marqo_index.AnnParameters] = None textQueryPrefix: Optional[str] = None diff --git a/src/marqo/models/marqo_index.py b/src/marqo/models/marqo_index.py index f07c3ba7..8605e214 100644 --- a/src/marqo/models/marqo_index.py +++ b/src/marqo/models/marqo_index.py @@ -23,6 +23,8 @@ class FieldType(str, Enum): ArrayFloat = 'array' ArrayDouble = 'array' ImagePointer = 'image_pointer' + VideoPointer = 'video_pointer' + AudioPointer = 'audio_pointer' MultimodalCombination = 'multimodal_combination' CustomVector = "custom_vector" MapInt = 'map' @@ -77,6 +79,14 @@ class TextPreProcessing(StrictBaseModel): class ImagePreProcessing(StrictBaseModel): patchMethod: Optional[PatchMethod] = Field(None, alias="patch_method") +class VideoPreProcessing(StrictBaseModel): + splitLength: Optional[int] = Field(None, alias="split_length") + splitOverlap: Optional[int] = Field(None, alias="split_overlap") + +class AudioPreProcessing(StrictBaseModel): + splitLength: Optional[int] = Field(None, alias="split_length") + splitOverlap: Optional[int] = Field(None, alias="split_overlap") + class Model(StrictBaseModel): name: Optional[str] = None diff --git a/tests/marqo_test.py b/tests/marqo_test.py index 45a76b6f..00a83702 100644 --- a/tests/marqo_test.py +++ b/tests/marqo_test.py @@ -209,6 +209,9 @@ def setUpClass(cls) -> None: cls.unstructured_no_model_index_name = "unstructured_no_model_index" cls.structured_image_index_name_simple_preprocessing_method = \ "structured_image_index_simple_preprocessing_method" + cls.unstructured_languagebind_index_name = "unstructured_languagebind_index" + cls.structured_languagebind_index_name = "structured_languagebind_index" + # TODO: include structured when boolean_field bug for structured is fixed cls.test_cases = [ (CloudTestIndex.unstructured_image, cls.unstructured_index_name), @@ -262,6 +265,25 @@ def setUpClass(cls) -> None: "type": "no_model", "dimensions": 512 } + }, + { + "indexName": cls.unstructured_languagebind_index_name, + "type": "unstructured", + "model": "LanguageBind/Video_V1.5_FT_Audio_FT_Image", + "treatUrlsAndPointersAsMedia": True, + "treatUrlsAndPointersAsImages": True + }, + { + "indexName": cls.structured_languagebind_index_name, + "type": "structured", + "model": "LanguageBind/Video_V1.5_FT_Audio_FT_Image", + "allFields": [ + {"name": "text_field", "type": "text"}, + {"name": "video_field", "type": "video_pointer"}, + {"name": "audio_field", "type": "audio_pointer"}, + {"name": "image_field", "type": "image_pointer"} + ], + "tensorFields": ["text_field", "video_field", "audio_field", "image_field"] } ]) except Exception as e: diff --git a/tests/v2_tests/test_create_index.py b/tests/v2_tests/test_create_index.py index 99c190bd..594c1422 100644 --- a/tests/v2_tests/test_create_index.py +++ b/tests/v2_tests/test_create_index.py @@ -311,3 +311,98 @@ def test_dash_and_underscore_in_index_name(self): self.assertEqual(1, len(res['hits'])) self.client.delete_index("test-dash-and-under-score") self.client.delete_index("test_dash_and_under_score") + + def test_create_unstructured_index_with_languagebind(self): + self.client.create_index( + index_name=self.index_name, + type="unstructured", + model="LanguageBind/Video_V1.5_FT_Audio_FT_Image", + treat_urls_and_pointers_as_media=True, + treat_urls_and_pointers_as_images=True + ) + + index_settings = self.client.index(self.index_name).get_settings() + + expected_settings = { + "type": "unstructured", + "model": "LanguageBind/Video_V1.5_FT_Audio_FT_Image", + "normalizeEmbeddings": True, + "treatUrlsAndPointersAsMedia": True, + "treatUrlsAndPointersAsImages": True, + "vectorNumericType": "float" + } + + for key, value in expected_settings.items(): + self.assertEqual(value, index_settings[key]) + + # Test adding and searching documents + ix = self.client.index(self.index_name) + + res = ix.add_documents( + documents = [ + {"audio_field": "https://audio-previews.elements.envatousercontent.com/files/187680354/preview.mp3", "_id": "corporate"}, + {"audio_field": "https://audio-previews.elements.envatousercontent.com/files/492763015/preview.mp3", "_id": "lofi"}, + ], + tensor_fields=["audio_field"] + ) + + doc = ix.search( + q="corporate video background music", + limit=5 + ) + + self.assertEqual(2, len(doc['hits'])) + self.assertEqual("corporate", doc['hits'][0]['_id']) + self.assertEqual("lofi", doc['hits'][1]['_id']) + + def test_create_structured_index_with_languagebind(self): + self.client.create_index( + index_name=self.index_name, + type="structured", + model="LanguageBind/Video_V1.5_FT_Audio_FT_Image", + all_fields=[ + {"name": "text_field", "type": "text"}, + {"name": "video_field", "type": "video_pointer"}, + {"name": "audio_field", "type": "audio_pointer"}, + {"name": "image_field", "type": "image_pointer"} + ], + tensor_fields=["text_field", "video_field", "audio_field", "image_field"] + ) + + index_settings = self.client.index(self.index_name).get_settings() + + expected_settings = { + "type": "structured", + "model": "LanguageBind/Video_V1.5_FT_Audio_FT_Image", + "normalizeEmbeddings": True, + "vectorNumericType": "float", + "tensorFields": ["text_field", "video_field", "audio_field", "image_field"], + "allFields": [ + {"features": [], "name": "text_field", "type": "text"}, + {"features": [], "name": "video_field", "type": "video_pointer"}, + {"features": [], "name": "audio_field", "type": "audio_pointer"}, + {"features": [], "name": "image_field", "type": "image_pointer"}, + ] + } + + for key, value in expected_settings.items(): + self.assertEqual(value, index_settings[key]) + + # Test adding and searching documents + ix = self.client.index(self.index_name) + + res = ix.add_documents( + documents = [ + {"audio_field": "https://audio-previews.elements.envatousercontent.com/files/187680354/preview.mp3", "_id": "corporate"}, + {"audio_field": "https://audio-previews.elements.envatousercontent.com/files/492763015/preview.mp3", "_id": "lofi"}, + ], + ) + + doc = ix.search( + q="corporate video background music", + limit=5 + ) + + self.assertEqual(2, len(doc['hits'])) + self.assertEqual("corporate", doc['hits'][0]['_id']) + self.assertEqual("lofi", doc['hits'][1]['_id']) \ No newline at end of file diff --git a/tests/v2_tests/test_embed.py b/tests/v2_tests/test_embed.py index 0350c69e..1defacd0 100644 --- a/tests/v2_tests/test_embed.py +++ b/tests/v2_tests/test_embed.py @@ -183,3 +183,60 @@ def test_embed_non_numeric_weight_fails(self): self.client.index(test_index_name).embed(content={"text to embed": "not a number"}) self.assertIn("not a valid float", str(e.exception)) + + def test_embed_images_with_languagebind(self): + """Embeds multiple images using LanguageBind model.""" + test_index_name = self.unstructured_languagebind_index_name + + image_urls = [ + "https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png", + "https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png", + "https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png" + ] + + embed_res = self.client.index(test_index_name).embed(content=image_urls) + + self.assertIn("processingTimeMs", embed_res) + self.assertEqual(embed_res["content"], image_urls) + self.assertEqual(len(embed_res["embeddings"]), 3) + + # Check that embeddings are non-zero and have the expected shape + for embedding in embed_res["embeddings"]: + self.assertGreater(len(embedding), 0) + self.assertTrue(any(abs(x) > 1e-6 for x in embedding)) + + # Check that embeddings are close to the expected values + expected_embedding = [0.019889963790774345, -0.01263524405658245, + 0.026028314605355263, 0.005291664972901344, -0.013181567192077637] + for embedding in embed_res["embeddings"]: + for i, value in enumerate(expected_embedding): + self.assertAlmostEqual(embedding[i], value, places=5) + + + def test_embed_videos_with_languagebind(self): + """Embeds multiple videos using LanguageBind model.""" + test_index_name = self.structured_languagebind_index_name + + video_urls = [ + "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4", + "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4", + "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4" + ] + + embed_res = self.client.index(test_index_name).embed(content=video_urls) + + self.assertIn("processingTimeMs", embed_res) + self.assertEqual(embed_res["content"], video_urls) + self.assertEqual(len(embed_res["embeddings"]), 3) + + # Check that embeddings are non-zero and have the expected shape + for embedding in embed_res["embeddings"]: + self.assertGreater(len(embedding), 0) + self.assertTrue(any(abs(x) > 1e-6 for x in embedding)) + + # Check that embeddings are close to the expected values + expected_embedding = [0.0394694060087204, 0.049264926463365555, + -0.014714145101606846, 0.05715121701359749, -0.019508328288793564] + for embedding in embed_res["embeddings"]: + for i, value in enumerate(expected_embedding): + self.assertAlmostEqual(embedding[i], value, places=5)