From 75673862514ed3ddd7ed3420d548918e22b85405 Mon Sep 17 00:00:00 2001
From: pandu-k <107458762+pandu-k@users.noreply.github.com>
Date: Thu, 6 Jul 2023 20:43:37 +1000
Subject: [PATCH] Removed references to processes, server_batch_size. Ensured
 add_documents calls have create_index() calls before them (#529)

---
 examples/GPT-examples/ironman.py                   | 2 +-
 examples/GPT3NewsSummary/README.md                 | 3 +++
 examples/GPT3NewsSummary/main.py                   | 3 +++
 examples/ImageSearchGuide/ImageSearchGuide.md      | 2 +-
 examples/ImageSearchGuide/imagesearchguide.ipynb   | 2 +-
 examples/ImageSearchLocalization/article.md        | 5 ++---
 examples/ImageSearchLocalization/index_all_data.py | 6 ++----
 examples/SimpleWiki/simple_wiki_demo.py            | 3 ---
 examples/SpeechProcessing/SpeechSearch/indexer.py  | 3 +--
 examples/SpeechProcessing/article/article.md       | 3 +--
 examples/StableDiffusion/hot-dog-100k.md           | 6 ++----
 examples/StableDiffusion/hot-dog-100k.py           | 8 +++-----
 examples/podcast-search/podcast_search_demo.py     | 3 ++-
 13 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/examples/GPT-examples/ironman.py b/examples/GPT-examples/ironman.py
index c07509f32..3cc5ed5d6 100644
--- a/examples/GPT-examples/ironman.py
+++ b/examples/GPT-examples/ironman.py
@@ -100,7 +100,7 @@
         }
     }
 
-    # create the index - if no settings are present then sensible deffaults are used    
+    # create the index - if no settings are present then sensible defaults are used
     mq.create_index(index_name, settings_dict=index_settings)
     res = mq.index(index_name).add_documents(documents)
 
diff --git a/examples/GPT3NewsSummary/README.md b/examples/GPT3NewsSummary/README.md
index d3e1dad19..678c4819d 100644
--- a/examples/GPT3NewsSummary/README.md
+++ b/examples/GPT3NewsSummary/README.md
@@ -79,6 +79,9 @@ DOC_INDEX_NAME = ''news-index'
 print('Establishing connection to marqo client.')
 mq = marqo.Client(url='http://localhost:8882')
 
+print('creating a Marqo index')
+mq.create_index(DOC_INDEX_NAME)
+
 print('Indexing documents')
 mq.index(DOC_INDEX_NAME).add_documents(MARQO_DOCUMENTS)
 ```  
diff --git a/examples/GPT3NewsSummary/main.py b/examples/GPT3NewsSummary/main.py
index 9171a955e..acdf486ca 100644
--- a/examples/GPT3NewsSummary/main.py
+++ b/examples/GPT3NewsSummary/main.py
@@ -34,6 +34,9 @@
     except KeyboardInterrupt:
         raise
     except:
+        print('Creating index')
+        mq.create_index(DOC_INDEX_NAME)
+
         print('Indexing documents')
         mq.index(DOC_INDEX_NAME).add_documents(MARQO_DOCUMENTS)
         print('Done')
diff --git a/examples/ImageSearchGuide/ImageSearchGuide.md b/examples/ImageSearchGuide/ImageSearchGuide.md
index f3bcd38f3..c5cb84406 100644
--- a/examples/ImageSearchGuide/ImageSearchGuide.md
+++ b/examples/ImageSearchGuide/ImageSearchGuide.md
@@ -139,7 +139,7 @@ output:
 
 Add the documents into the previously created index using function `add_documents()`
 ```python
-mq.index(index_name).add_documents(documents, device="cpu", processes=1, client_batch_size= 1)
+mq.index(index_name).add_documents(documents, device="cpu", client_batch_size= 1)
 ```
 ```python
 outputs:
diff --git a/examples/ImageSearchGuide/imagesearchguide.ipynb b/examples/ImageSearchGuide/imagesearchguide.ipynb
index bbe7488c4..137c201d5 100644
--- a/examples/ImageSearchGuide/imagesearchguide.ipynb
+++ b/examples/ImageSearchGuide/imagesearchguide.ipynb
@@ -245,7 +245,7 @@
     }
    ],
    "source": [
-    "mq.index(index_name).add_documents(documents, device=\"cpu\", processes=1, client_batch_size= 1)"
+    "mq.index(index_name).add_documents(documents, device=\"cpu\", client_batch_size= 1)"
    ]
   },
   {
diff --git a/examples/ImageSearchLocalization/article.md b/examples/ImageSearchLocalization/article.md
index 22bb4c433..7745180ad 100644
--- a/examples/ImageSearchLocalization/article.md
+++ b/examples/ImageSearchLocalization/article.md
@@ -150,9 +150,8 @@ for patch_method in patch_methods:
    
     response = client.create_index(index_name, settings_dict=settings)
     
-    # index the documents on the GPU using multiple processes
-    response = client.index(index_name).add_documents(documents, device='cuda', 
-                                server_batch_size=50, processes=2)
+    # index the documents on the GPU 
+    response = client.index(index_name).add_documents(documents, device='cuda', client_batch_size=50)
 ```
 
 If no GPU is available, set device='cpu'. 
diff --git a/examples/ImageSearchLocalization/index_all_data.py b/examples/ImageSearchLocalization/index_all_data.py
index c06c454e8..10a2c7607 100644
--- a/examples/ImageSearchLocalization/index_all_data.py
+++ b/examples/ImageSearchLocalization/index_all_data.py
@@ -44,8 +44,7 @@
 index_name_prefix = "visual-search"
 patch_methods = ["dino/v1", None, "yolox"] #["dino/v1", "dino/v2", "frcnn", None, "yolox"]
 model_name = "ViT-B/32"
-n_processes = 3
-batch_size = 50
+batch_size = 24
 
 # set this to false if you do not want to delete the previous index of the same name
 delete_index = True
@@ -81,5 +80,4 @@
     response = client.create_index(index_name, settings_dict=settings)
 
 
-    response = client.index(index_name).add_documents(documents, device='cuda', 
-                                server_batch_size=batch_size, processes=n_processes)
\ No newline at end of file
+    response = client.index(index_name).add_documents(documents, device='cuda', client_batch_size=batch_size)
\ No newline at end of file
diff --git a/examples/SimpleWiki/simple_wiki_demo.py b/examples/SimpleWiki/simple_wiki_demo.py
index 9f46f82e2..8b41259c3 100644
--- a/examples/SimpleWiki/simple_wiki_demo.py
+++ b/examples/SimpleWiki/simple_wiki_demo.py
@@ -89,9 +89,6 @@ def split_big_docs(data, field='content', char_len=5e4):
 
 device = 'cpu'
 
-# here we use parallel indexing to speed up the task
-# Note: to use multiprocessing you will want at least 8GB of RAM and the maximum number 
-# of processes that can be supported will be system dependent. 
 responses = client.index(index_name).add_documents(data, device=device, client_batch_size=20)
 
 # optionally take a look at the responses
diff --git a/examples/SpeechProcessing/SpeechSearch/indexer.py b/examples/SpeechProcessing/SpeechSearch/indexer.py
index 4357bb63a..ddf88e4d4 100644
--- a/examples/SpeechProcessing/SpeechSearch/indexer.py
+++ b/examples/SpeechProcessing/SpeechSearch/indexer.py
@@ -24,8 +24,7 @@ def index_transciptions(
         annotated_transcriptions,
         non_tensor_fields=non_tensor_fields,
         device=device,
-        client_batch_size=batch_size,
-        server_batch_size=batch_size,
+        client_batch_size=batch_size
     )
 
     return response
diff --git a/examples/SpeechProcessing/article/article.md b/examples/SpeechProcessing/article/article.md
index 3290d3e76..28862eab1 100644
--- a/examples/SpeechProcessing/article/article.md
+++ b/examples/SpeechProcessing/article/article.md
@@ -316,8 +316,7 @@ def index_transciptions(
         annotated_transcriptions,
         non_tensor_fields=non_tensor_fields,
         device=device,
-        client_batch_size=batch_size,
-        server_batch_size=batch_size,
+        client_batch_size=batch_size
     )
 
     return response
diff --git a/examples/StableDiffusion/hot-dog-100k.md b/examples/StableDiffusion/hot-dog-100k.md
index e3409f0eb..c2fe050da 100644
--- a/examples/StableDiffusion/hot-dog-100k.md
+++ b/examples/StableDiffusion/hot-dog-100k.md
@@ -66,8 +66,7 @@ settings = {
            "treat_urls_and_pointers_as_images": True,
            }
 client.create_index("hot-dogs-100k", **settings)
-responses = client.index("hot-dogs-100k").add_documents(documents, 
-                                            device="cuda", processes=4, batch_size=50)
+responses = client.index("hot-dogs-100k").add_documents(documents, device="cuda", client_batch_size=50)
 
 ```
 Check we have our images in the index:
@@ -163,8 +162,7 @@ We have now calculated scores for the different categories described previously.
 
 ```python
 documents_image_docker = [doc.pop('image_docker') for doc in documents]
-responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu', 
-                                                            processes=3, batch_size=50)
+responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu', client_batch_size=50)
 ```
 
 ## Animating the hot-dog 100k dataset
diff --git a/examples/StableDiffusion/hot-dog-100k.py b/examples/StableDiffusion/hot-dog-100k.py
index 18fcd05b7..b97d6a588 100644
--- a/examples/StableDiffusion/hot-dog-100k.py
+++ b/examples/StableDiffusion/hot-dog-100k.py
@@ -58,9 +58,8 @@
         }
 client.create_index(index_name, **settings)
 
-# here we use parallel indexing to speed up the task - a gpu is recomended (device='cuda')
-responses = client.index(index_name).add_documents(documents, device='cpu'
-                                                    , processes=4, batch_size=50)
+# Here we index. A gpu is recommended (device='cuda')
+responses = client.index(index_name).add_documents(documents, device='cpu', client_batch_size=50)
 
 
 #####################################################
@@ -101,8 +100,7 @@
         doc[lab.replace(' ','_')] = [r['_score'] for r in responses['hits'] if r['label'] == lab][0]
 
 documents_image_docker = [doc.pop('image_docker') for doc in documents]
-responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu',
-                                                            processes=3, batch_size=50)
+responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu', client_batch_size=50)
 
 #####################################################
 ### Step 4. Remove the black images
diff --git a/examples/podcast-search/podcast_search_demo.py b/examples/podcast-search/podcast_search_demo.py
index c7293f7d0..391633639 100644
--- a/examples/podcast-search/podcast_search_demo.py
+++ b/examples/podcast-search/podcast_search_demo.py
@@ -46,7 +46,8 @@ def load_data(file: str, number_data: int) -> dict:
 ####################################################
 index_name = "marqo-podcast-search-demo"
 mq = marqo.Client(url='http://localhost:8882')  # Connection to Marqo Docker Container
-mq.index(index_name).add_documents(podcast_data)  # If the index doesn't exist, Marqo will create it
+mq.create_index(index_name)
+mq.index(index_name).add_documents(podcast_data)
 stats = mq.index(index_name).get_stats()  # get the stats for the index
 print(f"{stats['numberOfDocuments']} documents added to index: {index_name}")