deepset-ai · tstadel · Sep 22, 2022 · Sep 21, 2022 · Sep 21, 2022 · Sep 22, 2022
@@ -5570,10 +5570,10 @@
               "title": "Use Auth Token",
               "anyOf": [
                 {
-                  "type": "boolean"
+                  "type": "string"
                 },
                 {
-                  "type": "string"
+                  "type": "boolean"
                 },
                 {
                   "type": "null"
@@ -5823,10 +5823,17 @@
               "default": true,
               "type": "boolean"
             },
-            "return_all_scores": {
-              "title": "Return All Scores",
-              "default": false,
-              "type": "boolean"
+            "top_k": {
+              "title": "Top K",
+              "default": 1,
+              "anyOf": [
+                {
+                  "type": "integer"
+                },
+                {
+                  "type": "null"
+                }
+              ]
             },
             "task": {
               "title": "Task",

@@ -169,32 +169,63 @@ def pytest_collection_modifyitems(config, items):
                 keywords.extend(i.split("-"))
             else:
                 keywords.append(i)
-        for cur_doc_store in [
-            "elasticsearch",
-            "faiss",
-            "sql",
-            "memory",
-            "milvus1",
-            "milvus",
-            "weaviate",
-            "pinecone",
-            "opensearch",
-        ]:
-            if keywords and cur_doc_store in keywords and cur_doc_store not in document_store_types_to_run:
-                skip_docstore = pytest.mark.skip(
-                    reason=f'{cur_doc_store} is disabled. Enable via pytest --document_store_type="{cur_doc_store}"'
-                )
-                item.add_marker(skip_docstore)
-
-        if "milvus1" in keywords and not milvus1:
+
+        required_doc_store = infer_required_doc_store(item, keywords)
+
+        if required_doc_store not in document_store_types_to_run:
+            skip_docstore = pytest.mark.skip(
+                reason=f'{required_doc_store} is disabled. Enable via pytest --document_store_type="{required_doc_store}"'
+            )
+            item.add_marker(skip_docstore)
+
+        if "milvus1" == required_doc_store and not milvus1:
             skip_milvus1 = pytest.mark.skip(reason="Skipping Tests for 'milvus1', as Milvus2 seems to be installed.")
             item.add_marker(skip_milvus1)
 
-        elif "milvus" in keywords and milvus1:
+        elif "milvus" == required_doc_store and milvus1:
             skip_milvus = pytest.mark.skip(reason="Skipping Tests for 'milvus', as Milvus1 seems to be installed.")
             item.add_marker(skip_milvus)
 
 
+def infer_required_doc_store(item, keywords):
+    # assumption: a test runs only with one document_store
+    # if there are multiple docstore markers, we apply the following heuristics:
+    # 1. if the test was parameterized, we use the the parameter
+    # 2. if the test name contains the docstore name, we use that
+    # 3. use an arbitrary one by calling set.pop()
+    required_doc_store = None
+    all_doc_stores = {
+        "elasticsearch",
+        "faiss",
+        "sql",
+        "memory",
+        "milvus1",
+        "milvus",
+        "weaviate",
+        "pinecone",
+        "opensearch",
+    }
+    docstore_markers = set(keywords).intersection(all_doc_stores)
+    if len(docstore_markers) > 1:
+        # if parameterized infer the docstore from the parameter
+        if hasattr(item, "callspec"):
+            for doc_store in all_doc_stores:
+                # callspec.id contains the parameter values of the test
+                if doc_store in item.callspec.id:
+                    required_doc_store = doc_store
+                    break
+            # if still not found, infer the docstore from the test name
+        if required_doc_store is None:
+            for doc_store in all_doc_stores:
+                if doc_store in item.name:
+                    required_doc_store = doc_store
+                    break
+        # if still not found, use an arbitrary one
+    if required_doc_store is None:
+        required_doc_store = docstore_markers.pop() if docstore_markers else None
+    return required_doc_store
+
+
 #
 # Empty mocks, as a base for unit tests.
 #

@@ -518,7 +518,7 @@ def test_cosine_similarity(document_store):
     # now check if vectors are normalized when updating embeddings
     class MockRetriever:
         def embed_documents(self, docs):
-            return [np.random.rand(768).astype(np.float32) for doc in docs]
+            return np.random.rand(len(docs), 768).astype(np.float32)
 
     retriever = MockRetriever()
     document_store.update_embeddings(retriever=retriever)