Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix milvus and faiss tests not running #3263

Merged
merged 9 commits into from
Sep 22, 2022
Merged
19 changes: 13 additions & 6 deletions haystack/json-schemas/haystack-pipeline-1.10.0rc0.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -5570,10 +5570,10 @@
"title": "Use Auth Token",
"anyOf": [
{
"type": "boolean"
"type": "string"
},
{
"type": "string"
"type": "boolean"
},
{
"type": "null"
Expand Down Expand Up @@ -5823,10 +5823,17 @@
"default": true,
"type": "boolean"
},
"return_all_scores": {
"title": "Return All Scores",
"default": false,
"type": "boolean"
"top_k": {
"title": "Top K",
"default": 1,
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
]
},
"task": {
"title": "Task",
Expand Down
69 changes: 50 additions & 19 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,32 +169,63 @@ def pytest_collection_modifyitems(config, items):
keywords.extend(i.split("-"))
else:
keywords.append(i)
for cur_doc_store in [
"elasticsearch",
"faiss",
"sql",
"memory",
"milvus1",
"milvus",
"weaviate",
"pinecone",
"opensearch",
]:
if keywords and cur_doc_store in keywords and cur_doc_store not in document_store_types_to_run:
skip_docstore = pytest.mark.skip(
reason=f'{cur_doc_store} is disabled. Enable via pytest --document_store_type="{cur_doc_store}"'
)
item.add_marker(skip_docstore)

if "milvus1" in keywords and not milvus1:

required_doc_store = infer_required_doc_store(item, keywords)

if required_doc_store not in document_store_types_to_run:
skip_docstore = pytest.mark.skip(
reason=f'{required_doc_store} is disabled. Enable via pytest --document_store_type="{required_doc_store}"'
)
item.add_marker(skip_docstore)

if "milvus1" == required_doc_store and not milvus1:
skip_milvus1 = pytest.mark.skip(reason="Skipping Tests for 'milvus1', as Milvus2 seems to be installed.")
item.add_marker(skip_milvus1)

elif "milvus" in keywords and milvus1:
elif "milvus" == required_doc_store and milvus1:
skip_milvus = pytest.mark.skip(reason="Skipping Tests for 'milvus', as Milvus1 seems to be installed.")
item.add_marker(skip_milvus)


def infer_required_doc_store(item, keywords):
# assumption: a test runs only with one document_store
# if there are multiple docstore markers, we apply the following heuristics:
# 1. if the test was parameterized, we use the the parameter
# 2. if the test name contains the docstore name, we use that
# 3. use an arbitrary one by calling set.pop()
required_doc_store = None
all_doc_stores = {
"elasticsearch",
"faiss",
"sql",
"memory",
"milvus1",
"milvus",
"weaviate",
"pinecone",
"opensearch",
}
docstore_markers = set(keywords).intersection(all_doc_stores)
if len(docstore_markers) > 1:
# if parameterized infer the docstore from the parameter
if hasattr(item, "callspec"):
for doc_store in all_doc_stores:
# callspec.id contains the parameter values of the test
if doc_store in item.callspec.id:
required_doc_store = doc_store
break
# if still not found, infer the docstore from the test name
if required_doc_store is None:
for doc_store in all_doc_stores:
if doc_store in item.name:
required_doc_store = doc_store
break
# if still not found, use an arbitrary one
if required_doc_store is None:
required_doc_store = docstore_markers.pop() if docstore_markers else None
return required_doc_store


#
# Empty mocks, as a base for unit tests.
#
Expand Down
2 changes: 1 addition & 1 deletion test/document_stores/test_faiss_and_milvus.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ def test_cosine_similarity(document_store):
# now check if vectors are normalized when updating embeddings
class MockRetriever:
def embed_documents(self, docs):
return [np.random.rand(768).astype(np.float32) for doc in docs]
return np.random.rand(len(docs), 768).astype(np.float32)

retriever = MockRetriever()
document_store.update_embeddings(retriever=retriever)
Expand Down