diff --git a/docs/_src/api/api/retriever.md b/docs/_src/api/api/retriever.md
index ad6078caa6..34e2875eb3 100644
--- a/docs/_src/api/api/retriever.md
+++ b/docs/_src/api/api/retriever.md
@@ -28,13 +28,7 @@ Base class for regular retrievers.
 
 ```python
 @abstractmethod
-def retrieve(query: str,
-             filters: Optional[Dict[str, Union[Dict, List, str, int, float,
-                                               bool]]] = None,
-             top_k: Optional[int] = None,
-             index: str = None,
-             headers: Optional[Dict[str, str]] = None,
-             scale_score: bool = None) -> List[Document]
+def retrieve(query: str, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, scale_score: bool = None) -> List[Document]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -67,13 +61,7 @@ Wrapper method used to time functions.
 #### BaseRetriever.eval
 
 ```python
-def eval(label_index: str = "label",
-         doc_index: str = "eval_document",
-         label_origin: str = "gold-label",
-         top_k: int = 10,
-         open_domain: bool = False,
-         return_preds: bool = False,
-         headers: Optional[Dict[str, str]] = None) -> dict
+def eval(label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold-label", top_k: int = 10, open_domain: bool = False, return_preds: bool = False, headers: Optional[Dict[str, str]] = None) -> dict
 ```
 
 Performs evaluation on the Retriever.
@@ -122,11 +110,7 @@ class BM25Retriever(BaseRetriever)
 #### BM25Retriever.\_\_init\_\_
 
 ```python
-def __init__(document_store: KeywordDocumentStore,
-             top_k: int = 10,
-             all_terms_must_match: bool = False,
-             custom_query: Optional[str] = None,
-             scale_score: bool = True)
+def __init__(document_store: KeywordDocumentStore, top_k: int = 10, all_terms_must_match: bool = False, custom_query: Optional[str] = None, scale_score: bool = True)
 ```
 
 **Arguments**:
@@ -210,13 +194,7 @@ Otherwise raw similarity scores (e.g. cosine or dot_product) will be used.
 #### BM25Retriever.retrieve
 
 ```python
-def retrieve(query: str,
-             filters: Optional[Dict[str, Union[Dict, List, str, int, float,
-                                               bool]]] = None,
-             top_k: Optional[int] = None,
-             index: str = None,
-             headers: Optional[Dict[str, str]] = None,
-             scale_score: bool = None) -> List[Document]
+def retrieve(query: str, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, scale_score: bool = None) -> List[Document]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -302,18 +280,12 @@ Otherwise raw similarity scores (e.g. cosine or dot_product) will be used.
 #### BM25Retriever.retrieve\_batch
 
 ```python
-def retrieve_batch(queries: List[str],
-                   filters: Optional[Union[Dict[str, Union[Dict, List, str,
-                                                           int, float, bool]],
-                                           List[Dict[str,
-                                                     Union[Dict, List, str,
-                                                           int, float,
-                                                           bool]]], ]] = None,
-                   top_k: Optional[int] = None,
-                   index: str = None,
-                   headers: Optional[Dict[str, str]] = None,
-                   batch_size: Optional[int] = None,
-                   scale_score: bool = None) -> List[List[Document]]
+def retrieve_batch(queries: List[str], filters: Optional[
+            Union[
+                Dict[str, Union[Dict, List, str, int, float, bool]],
+                List[Dict[str, Union[Dict, List, str, int, float, bool]]],
+            ]
+        ] = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, batch_size: Optional[int] = None, scale_score: bool = None) -> List[List[Document]]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -414,12 +386,7 @@ Helpful for benchmarking, testing and if you want to do QA on small documents wi
 #### FilterRetriever.retrieve
 
 ```python
-def retrieve(query: str,
-             filters: dict = None,
-             top_k: Optional[int] = None,
-             index: str = None,
-             headers: Optional[Dict[str, str]] = None,
-             scale_score: bool = None) -> List[Document]
+def retrieve(query: str, filters: dict = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, scale_score: bool = None) -> List[Document]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -458,9 +425,7 @@ It uses sklearn's TfidfVectorizer to compute a tf-idf matrix.
 #### TfidfRetriever.\_\_init\_\_
 
 ```python
-def __init__(document_store: BaseDocumentStore,
-             top_k: int = 10,
-             auto_fit=True)
+def __init__(document_store: BaseDocumentStore, top_k: int = 10, auto_fit=True)
 ```
 
 **Arguments**:
@@ -474,16 +439,12 @@ def __init__(document_store: BaseDocumentStore,
 #### TfidfRetriever.retrieve
 
 ```python
-def retrieve(query: str,
-             filters: Optional[Union[Dict[str, Union[Dict, List, str, int,
-                                                     float, bool]],
-                                     List[Dict[str,
-                                               Union[Dict, List, str, int,
-                                                     float, bool]]], ]] = None,
-             top_k: Optional[int] = None,
-             index: str = None,
-             headers: Optional[Dict[str, str]] = None,
-             scale_score: bool = None) -> List[Document]
+def retrieve(query: str, filters: Optional[
+            Union[
+                Dict[str, Union[Dict, List, str, int, float, bool]],
+                List[Dict[str, Union[Dict, List, str, int, float, bool]]],
+            ]
+        ] = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, scale_score: bool = None) -> List[Document]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -505,14 +466,7 @@ Otherwise raw similarity scores (e.g. cosine or dot_product) will be used.
 #### TfidfRetriever.retrieve\_batch
 
 ```python
-def retrieve_batch(queries: Union[str, List[str]],
-                   filters: Optional[Dict[str, Union[Dict, List, str, int,
-                                                     float, bool]]] = None,
-                   top_k: Optional[int] = None,
-                   index: str = None,
-                   headers: Optional[Dict[str, str]] = None,
-                   batch_size: Optional[int] = None,
-                   scale_score: bool = None) -> List[List[Document]]
+def retrieve_batch(queries: Union[str, List[str]], filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, batch_size: Optional[int] = None, scale_score: bool = None) -> List[List[Document]]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -565,25 +519,7 @@ Karpukhin, Vladimir, et al. (2020): "Dense Passage Retrieval for Open-Domain Que
 #### DensePassageRetriever.\_\_init\_\_
 
 ```python
-def __init__(document_store: BaseDocumentStore,
-             query_embedding_model: Union[
-                 Path, str] = "facebook/dpr-question_encoder-single-nq-base",
-             passage_embedding_model: Union[
-                 Path, str] = "facebook/dpr-ctx_encoder-single-nq-base",
-             model_version: Optional[str] = None,
-             max_seq_len_query: int = 64,
-             max_seq_len_passage: int = 256,
-             top_k: int = 10,
-             use_gpu: bool = True,
-             batch_size: int = 16,
-             embed_title: bool = True,
-             use_fast_tokenizers: bool = True,
-             similarity_function: str = "dot_product",
-             global_loss_buffer_size: int = 150000,
-             progress_bar: bool = True,
-             devices: Optional[List[Union[str, torch.device]]] = None,
-             use_auth_token: Optional[Union[str, bool]] = None,
-             scale_score: bool = True)
+def __init__(document_store: BaseDocumentStore, query_embedding_model: Union[Path, str] = "facebook/dpr-question_encoder-single-nq-base", passage_embedding_model: Union[Path, str] = "facebook/dpr-ctx_encoder-single-nq-base", model_version: Optional[str] = None, max_seq_len_query: int = 64, max_seq_len_passage: int = 256, top_k: int = 10, use_gpu: bool = True, batch_size: int = 16, embed_title: bool = True, use_fast_tokenizers: bool = True, similarity_function: str = "dot_product", global_loss_buffer_size: int = 150000, progress_bar: bool = True, devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None, scale_score: bool = True)
 ```
 
 Init the Retriever incl. the two encoder models from a local or remote model checkpoint.
@@ -651,13 +587,7 @@ Otherwise raw similarity scores (e.g. cosine or dot_product) will be used.
 #### DensePassageRetriever.retrieve
 
 ```python
-def retrieve(query: str,
-             filters: Optional[Dict[str, Union[Dict, List, str, int, float,
-                                               bool]]] = None,
-             top_k: Optional[int] = None,
-             index: str = None,
-             headers: Optional[Dict[str, str]] = None,
-             scale_score: bool = None) -> List[Document]
+def retrieve(query: str, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, scale_score: bool = None) -> List[Document]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -741,18 +671,12 @@ Otherwise raw similarity scores (e.g. cosine or dot_product) will be used.
 #### DensePassageRetriever.retrieve\_batch
 
 ```python
-def retrieve_batch(queries: List[str],
-                   filters: Optional[Union[Dict[str, Union[Dict, List, str,
-                                                           int, float, bool]],
-                                           List[Dict[str,
-                                                     Union[Dict, List, str,
-                                                           int, float,
-                                                           bool]]], ]] = None,
-                   top_k: Optional[int] = None,
-                   index: str = None,
-                   headers: Optional[Dict[str, str]] = None,
-                   batch_size: Optional[int] = None,
-                   scale_score: bool = None) -> List[List[Document]]
+def retrieve_batch(queries: List[str], filters: Optional[
+            Union[
+                Dict[str, Union[Dict, List, str, int, float, bool]],
+                List[Dict[str, Union[Dict, List, str, int, float, bool]]],
+            ]
+        ] = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, batch_size: Optional[int] = None, scale_score: bool = None) -> List[List[Document]]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -878,36 +802,7 @@ Embeddings of documents / passages shape (batch_size, embedding_dim)
 #### DensePassageRetriever.train
 
 ```python
-def train(data_dir: str,
-          train_filename: str,
-          dev_filename: str = None,
-          test_filename: str = None,
-          max_samples: int = None,
-          max_processes: int = 128,
-          multiprocessing_strategy: Optional[str] = None,
-          dev_split: float = 0,
-          batch_size: int = 2,
-          embed_title: bool = True,
-          num_hard_negatives: int = 1,
-          num_positives: int = 1,
-          n_epochs: int = 3,
-          evaluate_every: int = 1000,
-          n_gpu: int = 1,
-          learning_rate: float = 1e-5,
-          epsilon: float = 1e-08,
-          weight_decay: float = 0.0,
-          num_warmup_steps: int = 100,
-          grad_acc_steps: int = 1,
-          use_amp: str = None,
-          optimizer_name: str = "AdamW",
-          optimizer_correct_bias: bool = True,
-          save_dir: str = "../saved_models/dpr",
-          query_encoder_save_dir: str = "query_encoder",
-          passage_encoder_save_dir: str = "passage_encoder",
-          checkpoint_root_dir: Path = Path("model_checkpoints"),
-          checkpoint_every: Optional[int] = None,
-          checkpoints_to_keep: int = 3,
-          early_stopping: Optional[EarlyStopping] = None)
+def train(data_dir: str, train_filename: str, dev_filename: str = None, test_filename: str = None, max_samples: int = None, max_processes: int = 128, multiprocessing_strategy: Optional[str] = None, dev_split: float = 0, batch_size: int = 2, embed_title: bool = True, num_hard_negatives: int = 1, num_positives: int = 1, n_epochs: int = 3, evaluate_every: int = 1000, n_gpu: int = 1, learning_rate: float = 1e-5, epsilon: float = 1e-08, weight_decay: float = 0.0, num_warmup_steps: int = 100, grad_acc_steps: int = 1, use_amp: str = None, optimizer_name: str = "AdamW", optimizer_correct_bias: bool = True, save_dir: str = "../saved_models/dpr", query_encoder_save_dir: str = "query_encoder", passage_encoder_save_dir: str = "passage_encoder", checkpoint_root_dir: Path = Path("model_checkpoints"), checkpoint_every: Optional[int] = None, checkpoints_to_keep: int = 3, early_stopping: Optional[EarlyStopping] = None)
 ```
 
 train a DensePassageRetrieval model
@@ -961,9 +856,7 @@ If any checkpoints are stored, a subsequent run of train() will resume training
 #### DensePassageRetriever.save
 
 ```python
-def save(save_dir: Union[Path, str],
-         query_encoder_dir: str = "query_encoder",
-         passage_encoder_dir: str = "passage_encoder")
+def save(save_dir: Union[Path, str], query_encoder_dir: str = "query_encoder", passage_encoder_dir: str = "passage_encoder")
 ```
 
 Save DensePassageRetriever to the specified directory.
@@ -984,18 +877,7 @@ None
 
 ```python
 @classmethod
-def load(cls,
-         load_dir: Union[Path, str],
-         document_store: BaseDocumentStore,
-         max_seq_len_query: int = 64,
-         max_seq_len_passage: int = 256,
-         use_gpu: bool = True,
-         batch_size: int = 16,
-         embed_title: bool = True,
-         use_fast_tokenizers: bool = True,
-         similarity_function: str = "dot_product",
-         query_encoder_dir: str = "query_encoder",
-         passage_encoder_dir: str = "passage_encoder")
+def load(cls, load_dir: Union[Path, str], document_store: BaseDocumentStore, max_seq_len_query: int = 64, max_seq_len_passage: int = 256, use_gpu: bool = True, batch_size: int = 16, embed_title: bool = True, use_fast_tokenizers: bool = True, similarity_function: str = "dot_product", query_encoder_dir: str = "query_encoder", passage_encoder_dir: str = "passage_encoder")
 ```
 
 Load DensePassageRetriever from the specified directory.
@@ -1019,30 +901,7 @@ Kostić, Bogdan, et al. (2021): "Multi-modal Retrieval of Tables and Texts Using
 #### TableTextRetriever.\_\_init\_\_
 
 ```python
-def __init__(
-        document_store: BaseDocumentStore,
-        query_embedding_model: Union[
-            Path, str] = "deepset/bert-small-mm_retrieval-question_encoder",
-        passage_embedding_model: Union[
-            Path, str] = "deepset/bert-small-mm_retrieval-passage_encoder",
-        table_embedding_model: Union[
-            Path, str] = "deepset/bert-small-mm_retrieval-table_encoder",
-        model_version: Optional[str] = None,
-        max_seq_len_query: int = 64,
-        max_seq_len_passage: int = 256,
-        max_seq_len_table: int = 256,
-        top_k: int = 10,
-        use_gpu: bool = True,
-        batch_size: int = 16,
-        embed_meta_fields: List[str] = ["name", "section_title", "caption"],
-        use_fast_tokenizers: bool = True,
-        similarity_function: str = "dot_product",
-        global_loss_buffer_size: int = 150000,
-        progress_bar: bool = True,
-        devices: Optional[List[Union[str, torch.device]]] = None,
-        use_auth_token: Optional[Union[str, bool]] = None,
-        scale_score: bool = True,
-        use_fast: bool = True)
+def __init__(document_store: BaseDocumentStore, query_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-question_encoder", passage_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-passage_encoder", table_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-table_encoder", model_version: Optional[str] = None, max_seq_len_query: int = 64, max_seq_len_passage: int = 256, max_seq_len_table: int = 256, top_k: int = 10, use_gpu: bool = True, batch_size: int = 16, embed_meta_fields: List[str] = ["name", "section_title", "caption"], use_fast_tokenizers: bool = True, similarity_function: str = "dot_product", global_loss_buffer_size: int = 150000, progress_bar: bool = True, devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None, scale_score: bool = True, use_fast: bool = True)
 ```
 
 Init the Retriever incl. the two encoder models from a local or remote model checkpoint.
@@ -1097,18 +956,12 @@ Otherwise raw similarity scores (e.g. cosine or dot_product) will be used.
 #### TableTextRetriever.retrieve\_batch
 
 ```python
-def retrieve_batch(queries: List[str],
-                   filters: Optional[Union[Dict[str, Union[Dict, List, str,
-                                                           int, float, bool]],
-                                           List[Dict[str,
-                                                     Union[Dict, List, str,
-                                                           int, float,
-                                                           bool]]], ]] = None,
-                   top_k: Optional[int] = None,
-                   index: str = None,
-                   headers: Optional[Dict[str, str]] = None,
-                   batch_size: Optional[int] = None,
-                   scale_score: bool = None) -> List[List[Document]]
+def retrieve_batch(queries: List[str], filters: Optional[
+            Union[
+                Dict[str, Union[Dict, List, str, int, float, bool]],
+                List[Dict[str, Union[Dict, List, str, int, float, bool]]],
+            ]
+        ] = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, batch_size: Optional[int] = None, scale_score: bool = None) -> List[List[Document]]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -1237,38 +1090,7 @@ Embeddings of documents / passages. Shape: (batch_size, embedding_dim)
 #### TableTextRetriever.train
 
 ```python
-def train(data_dir: str,
-          train_filename: str,
-          dev_filename: str = None,
-          test_filename: str = None,
-          max_samples: int = None,
-          max_processes: int = 128,
-          dev_split: float = 0,
-          batch_size: int = 2,
-          embed_meta_fields: List[str] = [
-              "page_title", "section_title", "caption"
-          ],
-          num_hard_negatives: int = 1,
-          num_positives: int = 1,
-          n_epochs: int = 3,
-          evaluate_every: int = 1000,
-          n_gpu: int = 1,
-          learning_rate: float = 1e-5,
-          epsilon: float = 1e-08,
-          weight_decay: float = 0.0,
-          num_warmup_steps: int = 100,
-          grad_acc_steps: int = 1,
-          use_amp: str = None,
-          optimizer_name: str = "AdamW",
-          optimizer_correct_bias: bool = True,
-          save_dir: str = "../saved_models/mm_retrieval",
-          query_encoder_save_dir: str = "query_encoder",
-          passage_encoder_save_dir: str = "passage_encoder",
-          table_encoder_save_dir: str = "table_encoder",
-          checkpoint_root_dir: Path = Path("model_checkpoints"),
-          checkpoint_every: Optional[int] = None,
-          checkpoints_to_keep: int = 3,
-          early_stopping: Optional[EarlyStopping] = None)
+def train(data_dir: str, train_filename: str, dev_filename: str = None, test_filename: str = None, max_samples: int = None, max_processes: int = 128, dev_split: float = 0, batch_size: int = 2, embed_meta_fields: List[str] = ["page_title", "section_title", "caption"], num_hard_negatives: int = 1, num_positives: int = 1, n_epochs: int = 3, evaluate_every: int = 1000, n_gpu: int = 1, learning_rate: float = 1e-5, epsilon: float = 1e-08, weight_decay: float = 0.0, num_warmup_steps: int = 100, grad_acc_steps: int = 1, use_amp: str = None, optimizer_name: str = "AdamW", optimizer_correct_bias: bool = True, save_dir: str = "../saved_models/mm_retrieval", query_encoder_save_dir: str = "query_encoder", passage_encoder_save_dir: str = "passage_encoder", table_encoder_save_dir: str = "table_encoder", checkpoint_root_dir: Path = Path("model_checkpoints"), checkpoint_every: Optional[int] = None, checkpoints_to_keep: int = 3, early_stopping: Optional[EarlyStopping] = None)
 ```
 
 Train a TableTextRetrieval model.
@@ -1322,10 +1144,7 @@ checkpoint, a subdirectory with the name epoch_{epoch_num}_step_{step_num} is cr
 #### TableTextRetriever.save
 
 ```python
-def save(save_dir: Union[Path, str],
-         query_encoder_dir: str = "query_encoder",
-         passage_encoder_dir: str = "passage_encoder",
-         table_encoder_dir: str = "table_encoder")
+def save(save_dir: Union[Path, str], query_encoder_dir: str = "query_encoder", passage_encoder_dir: str = "passage_encoder", table_encoder_dir: str = "table_encoder")
 ```
 
 Save TableTextRetriever to the specified directory.
@@ -1347,20 +1166,7 @@ None
 
 ```python
 @classmethod
-def load(cls,
-         load_dir: Union[Path, str],
-         document_store: BaseDocumentStore,
-         max_seq_len_query: int = 64,
-         max_seq_len_passage: int = 256,
-         max_seq_len_table: int = 256,
-         use_gpu: bool = True,
-         batch_size: int = 16,
-         embed_meta_fields: List[str] = ["name", "section_title", "caption"],
-         use_fast_tokenizers: bool = True,
-         similarity_function: str = "dot_product",
-         query_encoder_dir: str = "query_encoder",
-         passage_encoder_dir: str = "passage_encoder",
-         table_encoder_dir: str = "table_encoder")
+def load(cls, load_dir: Union[Path, str], document_store: BaseDocumentStore, max_seq_len_query: int = 64, max_seq_len_passage: int = 256, max_seq_len_table: int = 256, use_gpu: bool = True, batch_size: int = 16, embed_meta_fields: List[str] = ["name", "section_title", "caption"], use_fast_tokenizers: bool = True, similarity_function: str = "dot_product", query_encoder_dir: str = "query_encoder", passage_encoder_dir: str = "passage_encoder", table_encoder_dir: str = "table_encoder")
 ```
 
 Load TableTextRetriever from the specified directory.
@@ -1378,21 +1184,7 @@ class EmbeddingRetriever(BaseRetriever)
 #### EmbeddingRetriever.\_\_init\_\_
 
 ```python
-def __init__(document_store: BaseDocumentStore,
-             embedding_model: str,
-             model_version: Optional[str] = None,
-             use_gpu: bool = True,
-             batch_size: int = 32,
-             max_seq_len: int = 512,
-             model_format: Optional[str] = None,
-             pooling_strategy: str = "reduce_mean",
-             emb_extraction_layer: int = -1,
-             top_k: int = 10,
-             progress_bar: bool = True,
-             devices: Optional[List[Union[str, torch.device]]] = None,
-             use_auth_token: Optional[Union[str, bool]] = None,
-             scale_score: bool = True,
-             embed_meta_fields: List[str] = [])
+def __init__(document_store: BaseDocumentStore, embedding_model: str, model_version: Optional[str] = None, use_gpu: bool = True, batch_size: int = 32, max_seq_len: int = 512, model_format: Optional[str] = None, pooling_strategy: str = "reduce_mean", emb_extraction_layer: int = -1, top_k: int = 10, progress_bar: bool = True, devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None, scale_score: bool = True, embed_meta_fields: List[str] = [])
 ```
 
 **Arguments**:
@@ -1447,13 +1239,7 @@ performance if your titles contain meaningful information for retrieval
 #### EmbeddingRetriever.retrieve
 
 ```python
-def retrieve(query: str,
-             filters: Optional[Dict[str, Union[Dict, List, str, int, float,
-                                               bool]]] = None,
-             top_k: Optional[int] = None,
-             index: str = None,
-             headers: Optional[Dict[str, str]] = None,
-             scale_score: bool = None) -> List[Document]
+def retrieve(query: str, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, scale_score: bool = None) -> List[Document]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -1537,18 +1323,12 @@ Otherwise raw similarity scores (e.g. cosine or dot_product) will be used.
 #### EmbeddingRetriever.retrieve\_batch
 
 ```python
-def retrieve_batch(queries: List[str],
-                   filters: Optional[Union[Dict[str, Union[Dict, List, str,
-                                                           int, float, bool]],
-                                           List[Dict[str,
-                                                     Union[Dict, List, str,
-                                                           int, float,
-                                                           bool]]], ]] = None,
-                   top_k: Optional[int] = None,
-                   index: str = None,
-                   headers: Optional[Dict[str, str]] = None,
-                   batch_size: Optional[int] = None,
-                   scale_score: bool = None) -> List[List[Document]]
+def retrieve_batch(queries: List[str], filters: Optional[
+            Union[
+                Dict[str, Union[Dict, List, str, int, float, bool]],
+                List[Dict[str, Union[Dict, List, str, int, float, bool]]],
+            ]
+        ] = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, batch_size: Optional[int] = None, scale_score: bool = None) -> List[List[Document]]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -1674,11 +1454,7 @@ Embeddings, one per input document
 #### EmbeddingRetriever.train
 
 ```python
-def train(training_data: List[Dict[str, Any]],
-          learning_rate: float = 2e-5,
-          n_epochs: int = 1,
-          num_warmup_steps: int = None,
-          batch_size: int = 16) -> None
+def train(training_data: List[Dict[str, Any]], learning_rate: float = 2e-5, n_epochs: int = 1, num_warmup_steps: int = None, batch_size: int = 16, train_loss: str = "mnrl") -> None
 ```
 
 Trains/adapts the underlying embedding model.
@@ -1697,6 +1473,8 @@ Each training data example is a dictionary with the following keys:
 - `n_epochs` (`int`): The number of epochs
 - `num_warmup_steps` (`int`): The number of warmup steps
 - `batch_size` (`int (optional)`): The batch size to use for the training, defaults to 16
+- `train_loss` (`str (optional)`): The loss to use for training.
+If using sentence-transformers, one of 'mnrl' (Multiple Negatives Ranking Loss) or 'margin_mse' (MarginMSE)
 
 <a id="dense.EmbeddingRetriever.save"></a>
 
@@ -1731,22 +1509,7 @@ Xiong, Wenhan, et. al. (2020): "Answering complex open-domain questions with mul
 #### MultihopEmbeddingRetriever.\_\_init\_\_
 
 ```python
-def __init__(document_store: BaseDocumentStore,
-             embedding_model: str,
-             model_version: Optional[str] = None,
-             num_iterations: int = 2,
-             use_gpu: bool = True,
-             batch_size: int = 32,
-             max_seq_len: int = 512,
-             model_format: str = "farm",
-             pooling_strategy: str = "reduce_mean",
-             emb_extraction_layer: int = -1,
-             top_k: int = 10,
-             progress_bar: bool = True,
-             devices: Optional[List[Union[str, torch.device]]] = None,
-             use_auth_token: Optional[Union[str, bool]] = None,
-             scale_score: bool = True,
-             embed_meta_fields: List[str] = [])
+def __init__(document_store: BaseDocumentStore, embedding_model: str, model_version: Optional[str] = None, num_iterations: int = 2, use_gpu: bool = True, batch_size: int = 32, max_seq_len: int = 512, model_format: str = "farm", pooling_strategy: str = "reduce_mean", emb_extraction_layer: int = -1, top_k: int = 10, progress_bar: bool = True, devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None, scale_score: bool = True, embed_meta_fields: List[str] = [])
 ```
 
 **Arguments**:
@@ -1802,13 +1565,7 @@ performance if your titles contain meaningful information for retrieval
 #### MultihopEmbeddingRetriever.retrieve
 
 ```python
-def retrieve(query: str,
-             filters: Optional[Dict[str, Union[Dict, List, str, int, float,
-                                               bool]]] = None,
-             top_k: Optional[int] = None,
-             index: str = None,
-             headers: Optional[Dict[str, str]] = None,
-             scale_score: bool = None) -> List[Document]
+def retrieve(query: str, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, scale_score: bool = None) -> List[Document]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -1892,18 +1649,12 @@ Otherwise raw similarity scores (e.g. cosine or dot_product) will be used.
 #### MultihopEmbeddingRetriever.retrieve\_batch
 
 ```python
-def retrieve_batch(queries: List[str],
-                   filters: Optional[Union[Dict[str, Union[Dict, List, str,
-                                                           int, float, bool]],
-                                           List[Dict[str,
-                                                     Union[Dict, List, str,
-                                                           int, float,
-                                                           bool]]], ]] = None,
-                   top_k: Optional[int] = None,
-                   index: str = None,
-                   headers: Optional[Dict[str, str]] = None,
-                   batch_size: Optional[int] = None,
-                   scale_score: bool = None) -> List[List[Document]]
+def retrieve_batch(queries: List[str], filters: Optional[
+            Union[
+                Dict[str, Union[Dict, List, str, int, float, bool]],
+                List[Dict[str, Union[Dict, List, str, int, float, bool]]],
+            ]
+        ] = None, top_k: Optional[int] = None, index: str = None, headers: Optional[Dict[str, str]] = None, batch_size: Optional[int] = None, scale_score: bool = None) -> List[List[Document]]
 ```
 
 Scan through documents in DocumentStore and return a small number documents
@@ -2010,10 +1761,7 @@ The generated SPARQL query is executed on a knowledge graph.
 #### Text2SparqlRetriever.\_\_init\_\_
 
 ```python
-def __init__(knowledge_graph,
-             model_name_or_path,
-             top_k: int = 1,
-             use_auth_token: Optional[Union[str, bool]] = None)
+def __init__(knowledge_graph, model_name_or_path, top_k: int = 1, use_auth_token: Optional[Union[str, bool]] = None)
 ```
 
 Init the Retriever by providing a knowledge graph and a pre-trained BART model
diff --git a/haystack/nodes/retriever/_embedding_encoder.py b/haystack/nodes/retriever/_embedding_encoder.py
index 366358555e..21636850e7 100644
--- a/haystack/nodes/retriever/_embedding_encoder.py
+++ b/haystack/nodes/retriever/_embedding_encoder.py
@@ -23,6 +23,12 @@
 logger = logging.getLogger(__name__)
 
 
+_TRAINING_LOSSES: Dict[str, Callable] = {
+    "mnrl": losses.MultipleNegativesRankingLoss,
+    "margin_mse": losses.MarginMSELoss,
+}
+
+
 class _BaseEmbeddingEncoder:
     @abstractmethod
     def embed_queries(self, texts: List[str]) -> List[np.ndarray]:
@@ -195,14 +201,30 @@ def train(
         n_epochs: int = 1,
         num_warmup_steps: int = None,
         batch_size: int = 16,
+        train_loss: str = "mnrl",
     ):
 
-        train_examples = [
-            InputExample(texts=[i["question"], i["pos_doc"], i["neg_doc"]], label=i["score"]) for i in training_data
-        ]
-        logger.info(f"GPL training/adapting {self.embedding_model} with {len(train_examples)} examples")
+        if train_loss not in _TRAINING_LOSSES:
+            raise ValueError(f"Unrecognized train_loss {train_loss}. Should be one of: {_TRAINING_LOSSES.keys()}")
+
+        train_examples = []
+        for i in training_data:
+            texts = [i["question"], i["pos_doc"]]
+            # Negative docs are supported by all losses
+            if "neg_doc" in i:
+                texts.append(i["neg_doc"])
+            if "score" not in i:
+                if train_loss == "margin_mse":
+                    raise ValueError(
+                        "Some training examples don't contain the 'score' field which is necessary when using 'margin_mse' loss."
+                    )
+                train_examples.append(InputExample(texts=texts))
+            else:
+                train_examples.append(InputExample(texts=texts, label=i["score"]))
+
+        logger.info(f"Training/adapting {self.embedding_model} with {len(train_examples)} examples")
         train_dataloader = DataLoader(train_examples, batch_size=batch_size, drop_last=True, shuffle=True)
-        train_loss = losses.MarginMSELoss(self.embedding_model)
+        train_loss = _TRAINING_LOSSES[train_loss](self.embedding_model)
 
         # Tune the model
         self.embedding_model.fit(
diff --git a/haystack/nodes/retriever/dense.py b/haystack/nodes/retriever/dense.py
index 3ae3829d42..303b7add5f 100644
--- a/haystack/nodes/retriever/dense.py
+++ b/haystack/nodes/retriever/dense.py
@@ -1863,6 +1863,7 @@ def train(
         n_epochs: int = 1,
         num_warmup_steps: int = None,
         batch_size: int = 16,
+        train_loss: str = "mnrl",
     ) -> None:
         """
         Trains/adapts the underlying embedding model.
@@ -1885,6 +1886,9 @@ def train(
         :type num_warmup_steps: int
         :param batch_size: The batch size to use for the training, defaults to 16
         :type batch_size: int (optional)
+        :param train_loss: The loss to use for training.
+                           If using sentence-transformers, one of 'mnrl' (Multiple Negatives Ranking Loss) or 'margin_mse' (MarginMSE)
+        :type train_loss: str (optional)
         """
         self.embedding_encoder.train(
             training_data,
@@ -1892,6 +1896,7 @@ def train(
             n_epochs=n_epochs,
             num_warmup_steps=num_warmup_steps,
             batch_size=batch_size,
+            train_loss=train_loss,
         )
 
     def save(self, save_dir: Union[Path, str]) -> None: