Removed references to processes, server_batch_size. Ensured add_docum…

…ents calls have create_index() calls before them (#529)
marqo-ai · Jul 6, 2023 · 7567386 · 7567386
1 parent 56ff881
commit 7567386
Show file tree

Hide file tree

Showing 13 changed files with 22 additions and 27 deletions.
diff --git a/examples/GPT-examples/ironman.py b/examples/GPT-examples/ironman.py
@@ -100,7 +100,7 @@
         }
     }
 
-    # create the index - if no settings are present then sensible deffaults are used    
+    # create the index - if no settings are present then sensible defaults are used
     mq.create_index(index_name, settings_dict=index_settings)
     res = mq.index(index_name).add_documents(documents)
 

diff --git a/examples/GPT3NewsSummary/README.md b/examples/GPT3NewsSummary/README.md
@@ -79,6 +79,9 @@ DOC_INDEX_NAME = ''news-index'
 print('Establishing connection to marqo client.')
 mq = marqo.Client(url='http://localhost:8882')
 
+print('creating a Marqo index')
+mq.create_index(DOC_INDEX_NAME)
+
 print('Indexing documents')
 mq.index(DOC_INDEX_NAME).add_documents(MARQO_DOCUMENTS)
 ```  

diff --git a/examples/GPT3NewsSummary/main.py b/examples/GPT3NewsSummary/main.py
@@ -34,6 +34,9 @@
     except KeyboardInterrupt:
         raise
     except:
+        print('Creating index')
+        mq.create_index(DOC_INDEX_NAME)
+
         print('Indexing documents')
         mq.index(DOC_INDEX_NAME).add_documents(MARQO_DOCUMENTS)
         print('Done')

diff --git a/examples/ImageSearchGuide/ImageSearchGuide.md b/examples/ImageSearchGuide/ImageSearchGuide.md
@@ -139,7 +139,7 @@ output:
 
 Add the documents into the previously created index using function `add_documents()`
 ```python
-mq.index(index_name).add_documents(documents, device="cpu", processes=1, client_batch_size= 1)
+mq.index(index_name).add_documents(documents, device="cpu", client_batch_size= 1)
 ```
 ```python
 outputs:

diff --git a/examples/ImageSearchGuide/imagesearchguide.ipynb b/examples/ImageSearchGuide/imagesearchguide.ipynb
@@ -245,7 +245,7 @@
     }
    ],
    "source": [
-    "mq.index(index_name).add_documents(documents, device=\"cpu\", processes=1, client_batch_size= 1)"
+    "mq.index(index_name).add_documents(documents, device=\"cpu\", client_batch_size= 1)"
    ]
   },
   {

diff --git a/examples/ImageSearchLocalization/article.md b/examples/ImageSearchLocalization/article.md
@@ -150,9 +150,8 @@ for patch_method in patch_methods:
 
     response = client.create_index(index_name, settings_dict=settings)
 
-    # index the documents on the GPU using multiple processes
-    response = client.index(index_name).add_documents(documents, device='cuda', 
-                                server_batch_size=50, processes=2)
+    # index the documents on the GPU 
+    response = client.index(index_name).add_documents(documents, device='cuda', client_batch_size=50)
 ```
 
 If no GPU is available, set device='cpu'. 

diff --git a/examples/ImageSearchLocalization/index_all_data.py b/examples/ImageSearchLocalization/index_all_data.py
@@ -44,8 +44,7 @@
 index_name_prefix = "visual-search"
 patch_methods = ["dino/v1", None, "yolox"] #["dino/v1", "dino/v2", "frcnn", None, "yolox"]
 model_name = "ViT-B/32"
-n_processes = 3
-batch_size = 50
+batch_size = 24
 
 # set this to false if you do not want to delete the previous index of the same name
 delete_index = True
@@ -81,5 +80,4 @@
     response = client.create_index(index_name, settings_dict=settings)
 
 
-    response = client.index(index_name).add_documents(documents, device='cuda', 
-                                server_batch_size=batch_size, processes=n_processes)
+    response = client.index(index_name).add_documents(documents, device='cuda', client_batch_size=batch_size)
diff --git a/examples/SimpleWiki/simple_wiki_demo.py b/examples/SimpleWiki/simple_wiki_demo.py
@@ -89,9 +89,6 @@ def split_big_docs(data, field='content', char_len=5e4):
 
 device = 'cpu'
 
-# here we use parallel indexing to speed up the task
-# Note: to use multiprocessing you will want at least 8GB of RAM and the maximum number 
-# of processes that can be supported will be system dependent. 
 responses = client.index(index_name).add_documents(data, device=device, client_batch_size=20)
 
 # optionally take a look at the responses

diff --git a/examples/SpeechProcessing/SpeechSearch/indexer.py b/examples/SpeechProcessing/SpeechSearch/indexer.py
@@ -24,8 +24,7 @@ def index_transciptions(
         annotated_transcriptions,
         non_tensor_fields=non_tensor_fields,
         device=device,
-        client_batch_size=batch_size,
-        server_batch_size=batch_size,
+        client_batch_size=batch_size
     )
 
     return response
diff --git a/examples/SpeechProcessing/article/article.md b/examples/SpeechProcessing/article/article.md
@@ -316,8 +316,7 @@ def index_transciptions(
         annotated_transcriptions,
         non_tensor_fields=non_tensor_fields,
         device=device,
-        client_batch_size=batch_size,
-        server_batch_size=batch_size,
+        client_batch_size=batch_size
     )
 
     return response

diff --git a/examples/StableDiffusion/hot-dog-100k.md b/examples/StableDiffusion/hot-dog-100k.md
@@ -66,8 +66,7 @@ settings = {
            "treat_urls_and_pointers_as_images": True,
            }
 client.create_index("hot-dogs-100k", **settings)
-responses = client.index("hot-dogs-100k").add_documents(documents, 
-                                            device="cuda", processes=4, batch_size=50)
+responses = client.index("hot-dogs-100k").add_documents(documents, device="cuda", client_batch_size=50)
 
 ```
 Check we have our images in the index:
@@ -163,8 +162,7 @@ We have now calculated scores for the different categories described previously.
 
 ```python
 documents_image_docker = [doc.pop('image_docker') for doc in documents]
-responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu', 
-                                                            processes=3, batch_size=50)
+responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu', client_batch_size=50)
 ```
 
 ## Animating the hot-dog 100k dataset

diff --git a/examples/StableDiffusion/hot-dog-100k.py b/examples/StableDiffusion/hot-dog-100k.py
@@ -58,9 +58,8 @@
         }
 client.create_index(index_name, **settings)
 
-# here we use parallel indexing to speed up the task - a gpu is recomended (device='cuda')
-responses = client.index(index_name).add_documents(documents, device='cpu'
-                                                    , processes=4, batch_size=50)
+# Here we index. A gpu is recommended (device='cuda')
+responses = client.index(index_name).add_documents(documents, device='cpu', client_batch_size=50)
 
 
 #####################################################
@@ -101,8 +100,7 @@
         doc[lab.replace(' ','_')] = [r['_score'] for r in responses['hits'] if r['label'] == lab][0]
 
 documents_image_docker = [doc.pop('image_docker') for doc in documents]
-responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu',
-                                                            processes=3, batch_size=50)
+responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu', client_batch_size=50)
 
 #####################################################
 ### Step 4. Remove the black images

diff --git a/examples/podcast-search/podcast_search_demo.py b/examples/podcast-search/podcast_search_demo.py
@@ -46,7 +46,8 @@ def load_data(file: str, number_data: int) -> dict:
 ####################################################
 index_name = "marqo-podcast-search-demo"
 mq = marqo.Client(url='http://localhost:8882')  # Connection to Marqo Docker Container
-mq.index(index_name).add_documents(podcast_data)  # If the index doesn't exist, Marqo will create it
+mq.create_index(index_name)
+mq.index(index_name).add_documents(podcast_data)
 stats = mq.index(index_name).get_stats()  # get the stats for the index
 print(f"{stats['numberOfDocuments']} documents added to index: {index_name}")