Use ElasticsearchDocumentStore.get_all_documents in `ElasticsearchF…

…ilterOnlyRetriever.retrieve` (#2151) * use get_all_documents in ElasticsearchFilterOnlyRetriever.retrieve * Update Documentation & Code Style * add test case for es_filter_only retriever * Update Documentation & Code Style * fix test by adding empty string for query * Update Documentation & Code Style * add explicit name of argument "query" Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Julian Risch <julian.risch@deepset.ai>
deepset-ai · Apr 25, 2022 · c401e86 · c401e86
1 parent 25475a6
commit c401e86
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 9 deletions.
diff --git a/docs/_src/api/api/retriever.md b/docs/_src/api/api/retriever.md
@@ -229,9 +229,9 @@ that are most relevant to the query.
 
 **Arguments**:
 
-- `query`: The query
+- `query`: Has no effect, can pass in empty string
 - `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
-- `top_k`: How many documents to return per query.
+- `top_k`: Has no effect, pass in any int or None
 - `index`: The name of the index in the DocumentStore from which to retrieve documents
 - `headers`: Custom HTTP headers to pass to elasticsearch client (e.g. {'Authorization': 'Basic YWRtaW46cm9vdA=='})
 Check out https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html for more information.

diff --git a/haystack/nodes/retriever/sparse.py b/haystack/nodes/retriever/sparse.py
@@ -157,20 +157,16 @@ def retrieve(
         Scan through documents in DocumentStore and return a small number documents
         that are most relevant to the query.
 
-        :param query: The query
+        :param query: Has no effect, can pass in empty string
         :param filters: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
-        :param top_k: How many documents to return per query.
+        :param top_k: Has no effect, pass in any int or None
         :param index: The name of the index in the DocumentStore from which to retrieve documents
         :param headers: Custom HTTP headers to pass to elasticsearch client (e.g. {'Authorization': 'Basic YWRtaW46cm9vdA=='})
                 Check out https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html for more information.
         """
-        if top_k is None:
-            top_k = self.top_k
         if index is None:
             index = self.document_store.index
-        documents = self.document_store.query(
-            query=None, filters=filters, top_k=top_k, custom_query=self.custom_query, index=index, headers=headers
-        )
+        documents = self.document_store.get_all_documents(filters=filters, index=index, headers=headers)
         return documents
 
 

diff --git a/test/test_retriever.py b/test/test_retriever.py
@@ -563,3 +563,25 @@ def test_embeddings_encoder_of_embedding_retriever_should_warn_about_model_forma
             "You may need to set 'model_format='sentence_transformers' to ensure correct loading of model."
             in caplog.text
         )
+
+
+@pytest.mark.parametrize("retriever", ["es_filter_only"], indirect=True)
+@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
+def test_es_filter_only(document_store, retriever):
+    docs = [
+        Document(content="Doc1", meta={"f1": "0"}),
+        Document(content="Doc2", meta={"f1": "0"}),
+        Document(content="Doc3", meta={"f1": "0"}),
+        Document(content="Doc4", meta={"f1": "0"}),
+        Document(content="Doc5", meta={"f1": "0"}),
+        Document(content="Doc6", meta={"f1": "0"}),
+        Document(content="Doc7", meta={"f1": "1"}),
+        Document(content="Doc8", meta={"f1": "0"}),
+        Document(content="Doc9", meta={"f1": "0"}),
+        Document(content="Doc10", meta={"f1": "0"}),
+        Document(content="Doc11", meta={"f1": "0"}),
+        Document(content="Doc12", meta={"f1": "0"}),
+    ]
+    document_store.write_documents(docs)
+    retrieved_docs = retriever.retrieve(query="", filters={"f1": ["0"]})
+    assert len(retrieved_docs) == 11