Skip to content

Commit

Permalink
Removed references to processes, server_batch_size. Ensured add_docum…
Browse files Browse the repository at this point in the history
…ents calls have create_index() calls before them (#529)
  • Loading branch information
pandu-k authored Jul 6, 2023
1 parent 56ff881 commit 7567386
Show file tree
Hide file tree
Showing 13 changed files with 22 additions and 27 deletions.
2 changes: 1 addition & 1 deletion examples/GPT-examples/ironman.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
}
}

# create the index - if no settings are present then sensible deffaults are used
# create the index - if no settings are present then sensible defaults are used
mq.create_index(index_name, settings_dict=index_settings)
res = mq.index(index_name).add_documents(documents)

Expand Down
3 changes: 3 additions & 0 deletions examples/GPT3NewsSummary/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ DOC_INDEX_NAME = ''news-index'
print('Establishing connection to marqo client.')
mq = marqo.Client(url='http://localhost:8882')
print('creating a Marqo index')
mq.create_index(DOC_INDEX_NAME)
print('Indexing documents')
mq.index(DOC_INDEX_NAME).add_documents(MARQO_DOCUMENTS)
```
Expand Down
3 changes: 3 additions & 0 deletions examples/GPT3NewsSummary/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
except KeyboardInterrupt:
raise
except:
print('Creating index')
mq.create_index(DOC_INDEX_NAME)

print('Indexing documents')
mq.index(DOC_INDEX_NAME).add_documents(MARQO_DOCUMENTS)
print('Done')
Expand Down
2 changes: 1 addition & 1 deletion examples/ImageSearchGuide/ImageSearchGuide.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ output:

Add the documents into the previously created index using function `add_documents()`
```python
mq.index(index_name).add_documents(documents, device="cpu", processes=1, client_batch_size= 1)
mq.index(index_name).add_documents(documents, device="cpu", client_batch_size= 1)
```
```python
outputs:
Expand Down
2 changes: 1 addition & 1 deletion examples/ImageSearchGuide/imagesearchguide.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@
}
],
"source": [
"mq.index(index_name).add_documents(documents, device=\"cpu\", processes=1, client_batch_size= 1)"
"mq.index(index_name).add_documents(documents, device=\"cpu\", client_batch_size= 1)"
]
},
{
Expand Down
5 changes: 2 additions & 3 deletions examples/ImageSearchLocalization/article.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,8 @@ for patch_method in patch_methods:

response = client.create_index(index_name, settings_dict=settings)

# index the documents on the GPU using multiple processes
response = client.index(index_name).add_documents(documents, device='cuda',
server_batch_size=50, processes=2)
# index the documents on the GPU
response = client.index(index_name).add_documents(documents, device='cuda', client_batch_size=50)
```

If no GPU is available, set device='cpu'. 
Expand Down
6 changes: 2 additions & 4 deletions examples/ImageSearchLocalization/index_all_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@
index_name_prefix = "visual-search"
patch_methods = ["dino/v1", None, "yolox"] #["dino/v1", "dino/v2", "frcnn", None, "yolox"]
model_name = "ViT-B/32"
n_processes = 3
batch_size = 50
batch_size = 24

# set this to false if you do not want to delete the previous index of the same name
delete_index = True
Expand Down Expand Up @@ -81,5 +80,4 @@
response = client.create_index(index_name, settings_dict=settings)


response = client.index(index_name).add_documents(documents, device='cuda',
server_batch_size=batch_size, processes=n_processes)
response = client.index(index_name).add_documents(documents, device='cuda', client_batch_size=batch_size)
3 changes: 0 additions & 3 deletions examples/SimpleWiki/simple_wiki_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,6 @@ def split_big_docs(data, field='content', char_len=5e4):

device = 'cpu'

# here we use parallel indexing to speed up the task
# Note: to use multiprocessing you will want at least 8GB of RAM and the maximum number
# of processes that can be supported will be system dependent.
responses = client.index(index_name).add_documents(data, device=device, client_batch_size=20)

# optionally take a look at the responses
Expand Down
3 changes: 1 addition & 2 deletions examples/SpeechProcessing/SpeechSearch/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ def index_transciptions(
annotated_transcriptions,
non_tensor_fields=non_tensor_fields,
device=device,
client_batch_size=batch_size,
server_batch_size=batch_size,
client_batch_size=batch_size
)

return response
3 changes: 1 addition & 2 deletions examples/SpeechProcessing/article/article.md
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,7 @@ def index_transciptions(
annotated_transcriptions,
non_tensor_fields=non_tensor_fields,
device=device,
client_batch_size=batch_size,
server_batch_size=batch_size,
client_batch_size=batch_size
)

return response
Expand Down
6 changes: 2 additions & 4 deletions examples/StableDiffusion/hot-dog-100k.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ settings = {
"treat_urls_and_pointers_as_images": True,
}
client.create_index("hot-dogs-100k", **settings)
responses = client.index("hot-dogs-100k").add_documents(documents,
device="cuda", processes=4, batch_size=50)
responses = client.index("hot-dogs-100k").add_documents(documents, device="cuda", client_batch_size=50)

```
Check we have our images in the index:
Expand Down Expand Up @@ -163,8 +162,7 @@ We have now calculated scores for the different categories described previously.

```python
documents_image_docker = [doc.pop('image_docker') for doc in documents]
responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu',
processes=3, batch_size=50)
responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu', client_batch_size=50)
```

## Animating the hot-dog 100k dataset
Expand Down
8 changes: 3 additions & 5 deletions examples/StableDiffusion/hot-dog-100k.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,8 @@
}
client.create_index(index_name, **settings)

# here we use parallel indexing to speed up the task - a gpu is recomended (device='cuda')
responses = client.index(index_name).add_documents(documents, device='cpu'
, processes=4, batch_size=50)
# Here we index. A gpu is recommended (device='cuda')
responses = client.index(index_name).add_documents(documents, device='cpu', client_batch_size=50)


#####################################################
Expand Down Expand Up @@ -101,8 +100,7 @@
doc[lab.replace(' ','_')] = [r['_score'] for r in responses['hits'] if r['label'] == lab][0]

documents_image_docker = [doc.pop('image_docker') for doc in documents]
responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu',
processes=3, batch_size=50)
responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu', client_batch_size=50)

#####################################################
### Step 4. Remove the black images
Expand Down
3 changes: 2 additions & 1 deletion examples/podcast-search/podcast_search_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def load_data(file: str, number_data: int) -> dict:
####################################################
index_name = "marqo-podcast-search-demo"
mq = marqo.Client(url='http://localhost:8882') # Connection to Marqo Docker Container
mq.index(index_name).add_documents(podcast_data) # If the index doesn't exist, Marqo will create it
mq.create_index(index_name)
mq.index(index_name).add_documents(podcast_data)
stats = mq.index(index_name).get_stats() # get the stats for the index
print(f"{stats['numberOfDocuments']} documents added to index: {index_name}")

Expand Down

0 comments on commit 7567386

Please sign in to comment.