Merge branch 'mainline' into joshua/2.11.4-client-2

marqo-ai · Sep 16, 2024 · f533c0b · f533c0b
2 parents 0cbc50e + 8035382
commit f533c0b
Show file tree

Hide file tree

Showing 12 changed files with 320 additions and 9 deletions.
diff --git a/.github/workflows/open-source-unit-tests.yml b/.github/workflows/open-source-unit-tests.yml
@@ -116,6 +116,8 @@ jobs:
           docker pull ${{ steps.prepare.outputs.registry }}/${{ steps.prepare.outputs.image_repo }}:${{ steps.prepare.outputs.image_tag }}
           docker run --name marqo -d --privileged -p 8882:8882 --add-host host.docker.internal:host-gateway \
             -e MARQO_ENABLE_BATCH_APIS=True \
+            -e MARQO_MAX_CUDA_MODEL_MEMORY=15 \
+            -e MARQO_MAX_CPU_MODEL_MEMORY=15 \
             ${{ steps.prepare.outputs.registry }}/${{ steps.prepare.outputs.image_repo }}:${{ steps.prepare.outputs.image_tag }}
                     
           # wait for marqo to start with timeout of 10 minutes

diff --git a/setup.py b/setup.py
@@ -18,7 +18,7 @@
         "tox"
     ],
     name="marqo",
-    version="3.7.0",
+    version="3.8.0",
     author="marqo org",
     author_email="org@marqo.io",
     description="Tensor search for humans",

diff --git a/src/marqo/client.py b/src/marqo/client.py
@@ -69,6 +69,7 @@ def create_index(
         type: Optional[marqo_index.IndexType] = None,
         settings_dict: Optional[Dict[str, Any]] = None,
         treat_urls_and_pointers_as_images: Optional[bool] = None,
+        treat_urls_and_pointers_as_media: Optional[bool] = None,
         filter_string_max_length: Optional[int] = None,
         all_fields: Optional[List[marqo_index.FieldRequest]] = None,
         tensor_fields: Optional[List[str]] = None,
@@ -77,6 +78,8 @@ def create_index(
         normalize_embeddings: Optional[bool] = None,
         text_preprocessing: Optional[marqo_index.TextPreProcessing] = None,
         image_preprocessing: Optional[marqo_index.ImagePreProcessing] = None,
+        audio_preprocessing: Optional[marqo_index.AudioPreProcessing] = None,
+        video_preprocessing: Optional[marqo_index.VideoPreProcessing] = None,
         vector_numeric_type: Optional[marqo_index.VectorNumericType] = None,
         ann_parameters: Optional[marqo_index.AnnParameters] = None,
         wait_for_readiness: bool = True,
@@ -100,6 +103,7 @@ def create_index(
                 parameters, and is passed directly as the index's
                 index_settings
             treat_urls_and_pointers_as_images: whether to treat urls and pointers as images
+            treat_urls_and_pointers_as_media: whether to treat urls and pointers as media (video/audio)
             filter_string_max_length: threshold for short string length in unstructured indexes,
                 Marqo can filter on short strings but can not filter on long strings
             all_fields: list of all the fields in the structured index
@@ -132,12 +136,15 @@ def create_index(
             config=self.config, index_name=index_name,
             type=type, settings_dict=settings_dict,
             treat_urls_and_pointers_as_images=treat_urls_and_pointers_as_images,
+            treat_urls_and_pointers_as_media=treat_urls_and_pointers_as_media,
             filter_string_max_length=filter_string_max_length,
             all_fields=all_fields, tensor_fields=tensor_fields,
             model=model, model_properties=model_properties,
             normalize_embeddings=normalize_embeddings,
             text_preprocessing=text_preprocessing,
             image_preprocessing=image_preprocessing,
+            audio_preprocessing=audio_preprocessing,
+            video_preprocessing=video_preprocessing,
             vector_numeric_type=vector_numeric_type,
             ann_parameters=ann_parameters,
             wait_for_readiness=wait_for_readiness,

diff --git a/src/marqo/index.py b/src/marqo/index.py
@@ -80,6 +80,7 @@ def create(config: Config,
                type: Optional[marqo_index.IndexType] = None,
                settings_dict: Optional[Dict[str, Any]] = None,
                treat_urls_and_pointers_as_images: Optional[bool] = None,
+               treat_urls_and_pointers_as_media: Optional[bool] = None,
                filter_string_max_length: Optional[int] = None,
                all_fields: Optional[List[marqo_index.FieldRequest]] = None,
                tensor_fields: Optional[List[str]] = None,
@@ -88,6 +89,8 @@ def create(config: Config,
                normalize_embeddings: Optional[bool] = None,
                text_preprocessing: Optional[marqo_index.TextPreProcessing] = None,
                image_preprocessing: Optional[marqo_index.ImagePreProcessing] = None,
+               audio_preprocessing: Optional[marqo_index.AudioPreProcessing] = None,
+               video_preprocessing: Optional[marqo_index.VideoPreProcessing] = None,
                vector_numeric_type: Optional[marqo_index.VectorNumericType] = None,
                ann_parameters: Optional[marqo_index.AnnParameters] = None,
                inference_type: Optional[str] = None,
@@ -114,6 +117,7 @@ def create(config: Config,
                 parameters, and is passed directly as the index's
                 index_settings
             treat_urls_and_pointers_as_images: whether to treat urls and pointers as images in unstructured indexes
+            treat_urls_and_pointers_as_media: whether to treat urls and pointers as media (video/audio) in unstructured indexes
             filter_string_max_length: threshold for short string length in unstructured indexes,
                 Marqo can filter on short strings but can not filter on long strings
             all_fields: list of fields in the structured index
@@ -148,13 +152,16 @@ def create(config: Config,
                 allFields=all_fields,
                 settingsDict=settings_dict,
                 treatUrlsAndPointersAsImages=treat_urls_and_pointers_as_images,
+                treatUrlsAndPointersAsMedia=treat_urls_and_pointers_as_media,
                 filterStringMaxLength=filter_string_max_length,
                 tensorFields=tensor_fields,
                 model=model,
                 modelProperties=model_properties,
                 normalizeEmbeddings=normalize_embeddings,
                 textPreprocessing=text_preprocessing,
                 imagePreprocessing=image_preprocessing,
+                audioPreprocessing=audio_preprocessing,
+                videoPreprocessing=video_preprocessing,
                 vectorNumericType=vector_numeric_type,
                 annParameters=ann_parameters,
                 textChunkPrefix=text_chunk_prefix,
@@ -170,13 +177,16 @@ def create(config: Config,
                 allFields=all_fields,
                 settingsDict=settings_dict,
                 treatUrlsAndPointersAsImages=treat_urls_and_pointers_as_images,
+                treatUrlsAndPointersAsMedia=treat_urls_and_pointers_as_media,
                 filterStringMaxLength=filter_string_max_length,
                 tensorFields=tensor_fields,
                 model=model,
                 modelProperties=model_properties,
                 normalizeEmbeddings=normalize_embeddings,
                 textPreprocessing=text_preprocessing,
                 imagePreprocessing=image_preprocessing,
+                audioPreprocessing=audio_preprocessing,
+                videoPreprocessing=video_preprocessing,
                 vectorNumericType=vector_numeric_type,
                 annParameters=ann_parameters,
                 numberOfInferences=number_of_inferences,

diff --git a/src/marqo/models/create_index_settings.py b/src/marqo/models/create_index_settings.py
@@ -14,7 +14,9 @@ class IndexSettings(MarqoBaseModel):
             Can not be specified with other parameters.
         tensorFields: A list of all tensor fields in the index.
         treatUrlsAndPointersAsImages: Whether to treat urls and pointers as images.
-            This unstructured index only parameter.
+            This is and unstructured index only parameter.
+        treatUrlsAndPointersAsMedia: Whether to treat urls and pointers as media (video/audio).
+            This is an unstructured index only parameter.
         filterStringMaxLength: The max length of the filter string in unstructured index
         model: The name of the model to use for the index.
         modelProperties: A dictionary of model properties.
@@ -34,12 +36,15 @@ class IndexSettings(MarqoBaseModel):
     settingsDict: Optional[Dict] = None
     tensorFields: Optional[List[str]] = None
     treatUrlsAndPointersAsImages: Optional[bool] = None
+    treatUrlsAndPointersAsMedia: Optional[bool] = None
     filterStringMaxLength: Optional[int] = None
     model: Optional[str] = None
     modelProperties: Optional[Dict[str, Any]] = None
     normalizeEmbeddings: Optional[bool] = None
     textPreprocessing: Optional[marqo_index.TextPreProcessing] = None
     imagePreprocessing: Optional[marqo_index.ImagePreProcessing] = None
+    audioPreprocessing: Optional[marqo_index.AudioPreProcessing] = None
+    videoPreprocessing: Optional[marqo_index.VideoPreProcessing] = None
     vectorNumericType: Optional[marqo_index.VectorNumericType] = None
     annParameters: Optional[marqo_index.AnnParameters] = None
     textQueryPrefix: Optional[str] = None

diff --git a/src/marqo/models/marqo_index.py b/src/marqo/models/marqo_index.py
@@ -23,6 +23,8 @@ class FieldType(str, Enum):
     ArrayFloat = 'array<float>'
     ArrayDouble = 'array<double>'
     ImagePointer = 'image_pointer'
+    VideoPointer = 'video_pointer'
+    AudioPointer = 'audio_pointer'
     MultimodalCombination = 'multimodal_combination'
     CustomVector = "custom_vector"
     MapInt = 'map<text, int>'
@@ -77,6 +79,14 @@ class TextPreProcessing(StrictBaseModel):
 class ImagePreProcessing(StrictBaseModel):
     patchMethod: Optional[PatchMethod] = Field(None, alias="patch_method")
 
+class VideoPreProcessing(StrictBaseModel):
+    splitLength: Optional[int] = Field(None, alias="split_length")
+    splitOverlap: Optional[int] = Field(None, alias="split_overlap")
+
+class AudioPreProcessing(StrictBaseModel):
+    splitLength: Optional[int] = Field(None, alias="split_length")
+    splitOverlap: Optional[int] = Field(None, alias="split_overlap")
+
 
 class Model(StrictBaseModel):
     name: Optional[str] = None

diff --git a/src/marqo/version.py b/src/marqo/version.py
@@ -1,4 +1,4 @@
-__minimum_supported_marqo_version__ = "2.10.0"
+__minimum_supported_marqo_version__ = "2.12.0"
 
 # NOTE: This isn't used anywhere
 def supported_marqo_version() -> str:

diff --git a/tests/cloud_test_logic/cloud_test_index.py b/tests/cloud_test_logic/cloud_test_index.py
@@ -8,11 +8,13 @@ class CloudTestIndex(str, Enum):
     Please try to keep names short to avoid hitting name-length limits
 
     We create 3 unstructured indexes and 3 structured indexes to test:
+
     1) unstructured_text: Text-only index using hf/e5-base-v2, 2 shards, 1 replica, CPU, balanced storage, for hybrid duplicates testing.
     2) unstructured_image: Image-compatible index using open_clip/ViT-B-32/laion2b_s34b_b79k, 1 shard, no replicas, CPU, basic storage.
     3) unstructured_no_model: 512-dimension custom vectors, 1 shard, no replicas, CPU, basic storage.
     4) structured_text: Structured text index with hf/e5-base-v2, lexical search, 2 shards, 1 replica, CPU, balanced storage.
     5) structured_image: Structured image-text index with open_clip/ViT-B-32, 2 shards, 1 replica, CPU, balanced storage, with image preprocessing.
+    6) structured_languagebind_model: a structured index using the LanguageBind model for multi-modal support.
     For more information on the settings of each index, please refer to index_name_to_settings_mappings.
 
     FOR CLOUD REPLICAS AND SHARDS:
@@ -21,6 +23,7 @@ class CloudTestIndex(str, Enum):
 
     We design these indexes to maximize the coverage of different settings and features. For each test method,
     we will have to manually specify which index to use.
+
     """
 
     unstructured_text = "pymarqo_unstr_txt"
@@ -32,6 +35,7 @@ class CloudTestIndex(str, Enum):
     structured_image_custom = "pymarqo_str_img_custom"
     structured_text = "pymarqo_str_txt"
     structured_image = "pymarqo_str_img"
+    structured_languagebind_model = "pymarqo_str_langbind_model"
 
 
 index_name_to_settings_mappings = {
@@ -80,11 +84,10 @@ class CloudTestIndex(str, Enum):
             {"name": "int_field_1", "type": "int", "features": ["score_modifier"]},
             {"name": "int_filter_field_1", "type": "int", "features": ["filter", "score_modifier"]}],
         "tensorFields": ["text_field_1", "text_field_2", "text_field_3"],
-
         "inferenceType": "marqo.CPU.small",
         "storageClass": "marqo.balanced",
         "numberOfShards": 2,
-        "numberOfReplicas": 1,  # For hybrid duplicates test
+        "numberOfReplicas": 1, # For hybrid duplicates test
     },
     CloudTestIndex.structured_image: {
         "type": "structured",
@@ -110,5 +113,27 @@ class CloudTestIndex(str, Enum):
         "imagePreprocessing": {
             "patchMethod": "simple",
         }
-    }
+    },
+    CloudTestIndex.structured_languagebind_model: {
+        "type": "structured",
+        "model": "LanguageBind/Video_V1.5_FT_Audio_FT_Image",
+        "inferenceType": "marqo.GPU",
+        "storageClass": "marqo.balanced",
+        "allFields": [
+            {"name": "text_field_1", "type": "text"},
+            {"name": "text_field_2", "type": "text"},
+            {"name": "text_field_3", "type": "text"},
+            {"name": "video_field_1", "type": "video_pointer"},
+            {"name": "video_field_2", "type": "video_pointer"},
+            {"name": "video_field_3", "type": "video_pointer"},
+            {"name": "audio_field_1", "type": "audio_pointer"},
+            {"name": "audio_field_2", "type": "audio_pointer"},
+            {"name": "image_field_1", "type": "image_pointer"},
+            {"name": "image_field_2", "type": "image_pointer"},
+            {"name": "multimodal_field", "type": "multimodal_combination"},
+        ],
+        "tensorFields": ["multimodal_field", "text_field_3", "video_field_3", "audio_field_2", "image_field_2"],
+        "normalizeEmbeddings": True,
+    },
+
 }
diff --git a/tests/marqo_test.py b/tests/marqo_test.py
@@ -209,10 +209,15 @@ def setUpClass(cls) -> None:
         cls.unstructured_no_model_index_name = "unstructured_no_model_index"
         cls.structured_image_index_name_simple_preprocessing_method = \
             "structured_image_index_simple_preprocessing_method"
+        cls.structured_languagebind_index_name = "structured_languagebind_index"
+
         # TODO: include structured when boolean_field bug for structured is fixed
         cls.test_cases = [
             (CloudTestIndex.unstructured_image, cls.unstructured_index_name),
         ]
+        cls.test_cases_multimodal = [
+            (CloudTestIndex.structured_languagebind_model, cls.structured_languagebind_index_name)
+        ]
 
         # class property to indicate if test is being run on multi
         cls.IS_MULTI_INSTANCE = (True if os.environ.get("IS_MULTI_INSTANCE", False) in ["True", "TRUE", "true", True] else False)
@@ -262,6 +267,36 @@ def setUpClass(cls) -> None:
                             "type": "no_model",
                             "dimensions": 512
                         }
+                    },
+                    {
+                        "indexName": cls.structured_languagebind_index_name,
+                        "type": "structured",
+                        "model": "LanguageBind/Video_V1.5_FT_Audio_FT_Image",
+                        "allFields": [
+                            {"name": "text_field_1", "type": "text"},
+                            {"name": "text_field_2", "type": "text"},
+                            {"name": "text_field_3", "type": "text"},
+                            {"name": "video_field_1", "type": "video_pointer"},
+                            {"name": "video_field_2", "type": "video_pointer"},
+                            {"name": "video_field_3", "type": "video_pointer"},
+                            {"name": "audio_field_1", "type": "audio_pointer"},
+                            {"name": "audio_field_2", "type": "audio_pointer"},
+                            {"name": "image_field_1", "type": "image_pointer"},
+                            {"name": "image_field_2", "type": "image_pointer"},
+                            {
+                                "name": "multimodal_field", 
+                                "type": "multimodal_combination",
+                                "dependentFields": {
+                                    "text_field_1": 0.1,
+                                    "text_field_2": 0.1,
+                                    "image_field_1": 0.5,
+                                    "video_field_1": 0.1,
+                                    "video_field_2": 0.1,
+                                    "audio_field_1": 0.1
+                                }
+                            },
+                        ],
+                        "tensorFields": ["multimodal_field", "text_field_3", "video_field_3", "audio_field_2", "image_field_2"]
                     }
                 ])
             except Exception as e: