Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Examples scripts updated for breaking changes #529

Merged
merged 1 commit into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/GPT-examples/ironman.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
}
}

# create the index - if no settings are present then sensible deffaults are used
# create the index - if no settings are present then sensible defaults are used
mq.create_index(index_name, settings_dict=index_settings)
res = mq.index(index_name).add_documents(documents)

Expand Down
3 changes: 3 additions & 0 deletions examples/GPT3NewsSummary/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ DOC_INDEX_NAME = ''news-index'
print('Establishing connection to marqo client.')
mq = marqo.Client(url='http://localhost:8882')
print('creating a Marqo index')
mq.create_index(DOC_INDEX_NAME)
print('Indexing documents')
mq.index(DOC_INDEX_NAME).add_documents(MARQO_DOCUMENTS)
```
Expand Down
3 changes: 3 additions & 0 deletions examples/GPT3NewsSummary/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
except KeyboardInterrupt:
raise
except:
print('Creating index')
mq.create_index(DOC_INDEX_NAME)

print('Indexing documents')
mq.index(DOC_INDEX_NAME).add_documents(MARQO_DOCUMENTS)
print('Done')
Expand Down
2 changes: 1 addition & 1 deletion examples/ImageSearchGuide/ImageSearchGuide.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ output:

Add the documents into the previously created index using function `add_documents()`
```python
mq.index(index_name).add_documents(documents, device="cpu", processes=1, client_batch_size= 1)
mq.index(index_name).add_documents(documents, device="cpu", client_batch_size= 1)
```
```python
outputs:
Expand Down
2 changes: 1 addition & 1 deletion examples/ImageSearchGuide/imagesearchguide.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@
}
],
"source": [
"mq.index(index_name).add_documents(documents, device=\"cpu\", processes=1, client_batch_size= 1)"
"mq.index(index_name).add_documents(documents, device=\"cpu\", client_batch_size= 1)"
]
},
{
Expand Down
5 changes: 2 additions & 3 deletions examples/ImageSearchLocalization/article.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,8 @@ for patch_method in patch_methods:

response = client.create_index(index_name, settings_dict=settings)

# index the documents on the GPU using multiple processes
response = client.index(index_name).add_documents(documents, device='cuda',
server_batch_size=50, processes=2)
# index the documents on the GPU
response = client.index(index_name).add_documents(documents, device='cuda', client_batch_size=50)
```

If no GPU is available, set device='cpu'. 
Expand Down
6 changes: 2 additions & 4 deletions examples/ImageSearchLocalization/index_all_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@
index_name_prefix = "visual-search"
patch_methods = ["dino/v1", None, "yolox"] #["dino/v1", "dino/v2", "frcnn", None, "yolox"]
model_name = "ViT-B/32"
n_processes = 3
batch_size = 50
batch_size = 24

# set this to false if you do not want to delete the previous index of the same name
delete_index = True
Expand Down Expand Up @@ -81,5 +80,4 @@
response = client.create_index(index_name, settings_dict=settings)


response = client.index(index_name).add_documents(documents, device='cuda',
server_batch_size=batch_size, processes=n_processes)
response = client.index(index_name).add_documents(documents, device='cuda', client_batch_size=batch_size)
3 changes: 0 additions & 3 deletions examples/SimpleWiki/simple_wiki_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,6 @@ def split_big_docs(data, field='content', char_len=5e4):

device = 'cpu'

# here we use parallel indexing to speed up the task
# Note: to use multiprocessing you will want at least 8GB of RAM and the maximum number
# of processes that can be supported will be system dependent.
responses = client.index(index_name).add_documents(data, device=device, client_batch_size=20)

# optionally take a look at the responses
Expand Down
3 changes: 1 addition & 2 deletions examples/SpeechProcessing/SpeechSearch/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ def index_transciptions(
annotated_transcriptions,
non_tensor_fields=non_tensor_fields,
device=device,
client_batch_size=batch_size,
server_batch_size=batch_size,
client_batch_size=batch_size
)

return response
3 changes: 1 addition & 2 deletions examples/SpeechProcessing/article/article.md
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,7 @@ def index_transciptions(
annotated_transcriptions,
non_tensor_fields=non_tensor_fields,
device=device,
client_batch_size=batch_size,
server_batch_size=batch_size,
client_batch_size=batch_size
)

return response
Expand Down
6 changes: 2 additions & 4 deletions examples/StableDiffusion/hot-dog-100k.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ settings = {
"treat_urls_and_pointers_as_images": True,
}
client.create_index("hot-dogs-100k", **settings)
responses = client.index("hot-dogs-100k").add_documents(documents,
device="cuda", processes=4, batch_size=50)
responses = client.index("hot-dogs-100k").add_documents(documents, device="cuda", client_batch_size=50)

```
Check we have our images in the index:
Expand Down Expand Up @@ -163,8 +162,7 @@ We have now calculated scores for the different categories described previously.

```python
documents_image_docker = [doc.pop('image_docker') for doc in documents]
responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu',
processes=3, batch_size=50)
responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu', client_batch_size=50)
```

## Animating the hot-dog 100k dataset
Expand Down
8 changes: 3 additions & 5 deletions examples/StableDiffusion/hot-dog-100k.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,8 @@
}
client.create_index(index_name, **settings)

# here we use parallel indexing to speed up the task - a gpu is recomended (device='cuda')
responses = client.index(index_name).add_documents(documents, device='cpu'
, processes=4, batch_size=50)
# Here we index. A gpu is recommended (device='cuda')
responses = client.index(index_name).add_documents(documents, device='cpu', client_batch_size=50)


#####################################################
Expand Down Expand Up @@ -101,8 +100,7 @@
doc[lab.replace(' ','_')] = [r['_score'] for r in responses['hits'] if r['label'] == lab][0]

documents_image_docker = [doc.pop('image_docker') for doc in documents]
responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu',
processes=3, batch_size=50)
responses = client.index("hot-dogs-100k").add_documents(documents, device='cpu', client_batch_size=50)

#####################################################
### Step 4. Remove the black images
Expand Down
3 changes: 2 additions & 1 deletion examples/podcast-search/podcast_search_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def load_data(file: str, number_data: int) -> dict:
####################################################
index_name = "marqo-podcast-search-demo"
mq = marqo.Client(url='http://localhost:8882') # Connection to Marqo Docker Container
mq.index(index_name).add_documents(podcast_data) # If the index doesn't exist, Marqo will create it
mq.create_index(index_name)
mq.index(index_name).add_documents(podcast_data)
stats = mq.index(index_name).get_stats() # get the stats for the index
print(f"{stats['numberOfDocuments']} documents added to index: {index_name}")

Expand Down