Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove **kwargs from FARMReader #2413

Merged
merged 6 commits into from
Apr 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/linux_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,9 @@ jobs:
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.03.tar.gz && tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin

- name: Install tesseract
run: sudo apt-get install tesseract-ocr libtesseract-dev poppler-utils
run: |
sudo apt update
sudo apt-get install tesseract-ocr libtesseract-dev poppler-utils

- name: Install Dependencies (on cache miss only)
# The cache might miss during the execution of an action: there should always be a fallback step to
Expand Down
2 changes: 1 addition & 1 deletion docs/_src/api/api/reader.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ While the underlying model can vary (BERT, Roberta, DistilBERT, ...), the interf
#### \_\_init\_\_

```python
def __init__(model_name_or_path: str, model_version: Optional[str] = None, context_window_size: int = 150, batch_size: int = 50, use_gpu: bool = True, devices: List[torch.device] = [], no_ans_boost: float = 0.0, return_no_answer: bool = False, top_k: int = 10, top_k_per_candidate: int = 3, top_k_per_sample: int = 1, num_processes: Optional[int] = None, max_seq_len: int = 256, doc_stride: int = 128, progress_bar: bool = True, duplicate_filtering: int = 0, use_confidence_scores: bool = True, confidence_threshold: Optional[float] = None, proxies: Optional[Dict[str, str]] = None, local_files_only=False, force_download=False, use_auth_token: Optional[Union[str, bool]] = None, **kwargs, ,)
def __init__(model_name_or_path: str, model_version: Optional[str] = None, context_window_size: int = 150, batch_size: int = 50, use_gpu: bool = True, devices: List[torch.device] = [], no_ans_boost: float = 0.0, return_no_answer: bool = False, top_k: int = 10, top_k_per_candidate: int = 3, top_k_per_sample: int = 1, num_processes: Optional[int] = None, max_seq_len: int = 256, doc_stride: int = 128, progress_bar: bool = True, duplicate_filtering: int = 0, use_confidence_scores: bool = True, confidence_threshold: Optional[float] = None, proxies: Optional[Dict[str, str]] = None, local_files_only=False, force_download=False, use_auth_token: Optional[Union[str, bool]] = None)
```

**Arguments**:
Expand Down
2 changes: 1 addition & 1 deletion docs/_src/tutorials/tutorials/11.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ To find out more about these pipelines, have a look at our [documentation](https
With any Pipeline, whether prebuilt or custom constructed,
you can save a diagram showing how all the components are connected.

![image](https://user-images.githubusercontent.com/1563902/102451716-54813700-4039-11eb-881e-f3c01b47ca15.png)
![image](https://github.com/deepset-ai/haystack/blob/master/docs/img/retriever-reader-pipeline.png)


```python
Expand Down
5 changes: 4 additions & 1 deletion haystack/nodes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ def wrapper_exportable_to_yaml(self, *args, **kwargs):
self._component_config = {"params": {}, "type": type(self).__name__}

# Make sure it runs only on the __init__of the implementations, not in superclasses
if init_func.__qualname__ == f"{self.__class__.__name__}.{init_func.__name__}":
# NOTE: we use '.endswith' because inner classes's __qualname__ will include the parent class'
# name, like: ParentClass.InnerClass.__init__.
# Inner classes are heavily used in tests.
if init_func.__qualname__.endswith(f"{self.__class__.__name__}.{init_func.__name__}"):

# Store all the named input parameters in self._component_config
for k, v in kwargs.items():
Expand Down
2 changes: 0 additions & 2 deletions haystack/nodes/reader/farm.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def __init__(
local_files_only=False,
force_download=False,
use_auth_token: Optional[Union[str, bool]] = None,
**kwargs,
):

"""
Expand Down Expand Up @@ -140,7 +139,6 @@ def __init__(
force_download=force_download,
devices=self.devices,
use_auth_token=use_auth_token,
**kwargs,
)
self.inferencer.model.prediction_heads[0].context_window_size = context_window_size
self.inferencer.model.prediction_heads[0].no_ans_boost = no_ans_boost
Expand Down
2 changes: 1 addition & 1 deletion haystack/pipelines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,7 @@ def _build_eval_dataframe(
df["eval_mode"] = "isolated" if "isolated" in field_name else "integrated"
partial_dfs.append(df)

return pd.concat(partial_dfs, ignore_index=True)
return pd.concat(partial_dfs, ignore_index=True).reset_index()

def get_next_nodes(self, node_id: str, stream_id: str):
current_node_edges = self.graph.edges(node_id, data=True)
Expand Down
3 changes: 2 additions & 1 deletion haystack/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def _optional_component_not_installed(component: str, dep_group: str, source_err
f"Failed to import '{component}', "
"which is an optional component in Haystack.\n"
f"Run 'pip install 'farm-haystack[{dep_group}]'' "
"to install the required dependencies and make this component available."
"to install the required dependencies and make this component available.\n"
f"(Original error: {str(source_error)})"
) from source_error


Expand Down
31 changes: 31 additions & 0 deletions test/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
SAMPLES_PATH,
MockDocumentStore,
MockRetriever,
MockNode,
deepset_cloud_fixture,
)

Expand Down Expand Up @@ -350,6 +351,36 @@ def __init__(self, document_store):
assert pipeline.get_document_store().base_parameter == "something"


def test_get_config_custom_node_with_params():
class CustomNode(MockNode):
def __init__(self, param: int):
super().__init__()
self.param = param

pipeline = Pipeline()
pipeline.add_node(CustomNode(param=10), name="custom_node", inputs=["Query"])

assert len(pipeline.get_config()["components"]) == 1
assert pipeline.get_config()["components"][0]["params"] == {"param": 10}


def test_get_config_custom_node_with_positional_params(caplog):
class CustomNode(MockNode):
def __init__(self, param: int = 1):
super().__init__()
self.param = param

pipeline = Pipeline()
with caplog.at_level(logging.WARNING):
pipeline.add_node(CustomNode(10), name="custom_node", inputs=["Query"])
assert (
"Unnamed __init__ parameters will not be saved to YAML "
"if Pipeline.save_to_yaml() is called" in caplog.text
)
assert len(pipeline.get_config()["components"]) == 1
assert pipeline.get_config()["components"][0]["params"] == {}


def test_generate_code_simple_pipeline():
config = {
"version": "unstable",
Expand Down
69 changes: 64 additions & 5 deletions test/test_pipeline_yaml.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from abc import abstractmethod
from numpy import mat
import pytest
import json
import logging
import inspect
import networkx as nx
from enum import Enum
Expand All @@ -11,6 +13,7 @@
from haystack.nodes import _json_schema
from haystack.nodes import FileTypeClassifier
from haystack.errors import HaystackError, PipelineConfigError, PipelineSchemaError
from haystack.nodes.base import BaseComponent

from .conftest import SAMPLES_PATH, MockNode, MockDocumentStore, MockReader, MockRetriever
from . import conftest
Expand Down Expand Up @@ -41,7 +44,9 @@ def mock_json_schema(request, monkeypatch, tmp_path):

# Generate mock schema in tmp_path
filename = f"haystack-pipeline-unstable.schema.json"
test_schema = _json_schema.get_json_schema(filename=filename, compatible_versions=["unstable"])
test_schema = _json_schema.get_json_schema(
filename=filename, compatible_versions=["unstable", haystack.__version__]
)

with open(tmp_path / filename, "w") as schema_file:
json.dump(test_schema, schema_file, indent=4)
Expand Down Expand Up @@ -274,11 +279,65 @@ def __init__(self, param: int):
assert pipeline.get_node("custom_node").param == 1


def test_load_yaml_custom_component_cant_be_abstract(tmp_path):
def test_load_yaml_custom_component_with_no_init(tmp_path):
class CustomNode(MockNode):
def __init__(self):
super().__init__()
pass

with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
tmp_file.write(
f"""
version: unstable
components:
- name: custom_node
type: CustomNode
pipelines:
- name: my_pipeline
nodes:
- name: custom_node
inputs:
- Query
"""
)
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
assert isinstance(pipeline.get_node("custom_node"), CustomNode)


def test_load_yaml_custom_component_neednt_call_super(tmp_path):
"""This is a side-effect. Here for behavior documentation only"""

class CustomNode(BaseComponent):
outgoing_edges = 1

def __init__(self, param: int):
self.param = param

def run(self, *a, **k):
pass

with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
tmp_file.write(
f"""
version: unstable
components:
- name: custom_node
type: CustomNode
params:
param: 1
pipelines:
- name: my_pipeline
nodes:
- name: custom_node
inputs:
- Query
"""
)
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
assert isinstance(pipeline.get_node("custom_node"), CustomNode)
assert pipeline.get_node("custom_node").param == 1


def test_load_yaml_custom_component_cant_be_abstract(tmp_path):
class CustomNode(MockNode):
@abstractmethod
def abstract_method(self):
pass
Expand Down Expand Up @@ -575,7 +634,7 @@ def __init__(self, some_exotic_parameter: str):
)
pipeline = Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
node = pipeline.get_node("custom_node")
node.some_exotic_parameter == "AnotherClass.CLASS_CONSTANT"
assert node.some_exotic_parameter == "AnotherClass.CLASS_CONSTANT"


def test_load_yaml_custom_component_with_superclass(tmp_path):
Expand Down