Skip to content

Commit

Permalink
Reduce precision in pipeline eval print functions (#943)
Browse files Browse the repository at this point in the history
A proposal to reduce the precision shown in the `EvalRetriever.print` and `EvalReader.print` to 4 significant figures. If the user wants the full precision, they can access the class attributes directly.

Before
```
Retriever
-----------------
has_answer recall: 0.8739495798319328 (208/238)
no_answer recall:  1.00 (120/120) (no_answer samples are always treated as correctly retrieved)
recall: 0.9162011173184358 (328 / 358)
```

After
```
Retriever
-----------------
has_answer recall: 0.8739 (208/238)
no_answer recall:  1.00 (120/120) (no_answer samples are always treated as correctly retrieved)
recall: 0.9162 (328 / 358)
```
  • Loading branch information
lewtun authored Apr 6, 2021
1 parent 41a1c83 commit 8894c4f
Showing 1 changed file with 11 additions and 11 deletions.
22 changes: 11 additions & 11 deletions haystack/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ def print(self):
print("-----------------")
if self.no_answer_count:
print(
f"has_answer recall: {self.has_answer_recall} ({self.has_answer_correct}/{self.has_answer_count})")
f"has_answer recall: {self.has_answer_recall:.4f} ({self.has_answer_correct}/{self.has_answer_count})")
print(
f"no_answer recall: 1.00 ({self.no_answer_count}/{self.no_answer_count}) (no_answer samples are always treated as correctly retrieved)")
print(f"recall: {self.recall} ({self.correct_retrieval_count} / {self.query_count})")
print(f"recall: {self.recall:.4f} ({self.correct_retrieval_count} / {self.query_count})")


class EvalReader:
Expand Down Expand Up @@ -199,14 +199,14 @@ def print(self, mode):
print("-----------------")
# print(f"answer in retrieved docs: {correct_retrieval}")
print(f"has answer queries: {self.has_answer_count}")
print(f"top 1 EM: {self.top_1_em}")
print(f"top k EM: {self.top_k_em}")
print(f"top 1 F1: {self.top_1_f1}")
print(f"top k F1: {self.top_k_f1}")
print(f"top 1 EM: {self.top_1_em:.4f}")
print(f"top k EM: {self.top_k_em:.4f}")
print(f"top 1 F1: {self.top_1_f1:.4f}")
print(f"top k F1: {self.top_k_f1:.4f}")
if self.no_answer_count:
print()
print(f"no_answer queries: {self.no_answer_count}")
print(f"top 1 no_answer accuracy: {self.top_1_no_answer}")
print(f"top 1 no_answer accuracy: {self.top_1_no_answer:.4f}")
elif mode == "pipeline":
print("Pipeline")
print("-----------------")
Expand All @@ -217,10 +217,10 @@ def print(self, mode):
pipeline_top_k_f1 = (self.top_k_f1_sum + self.no_answer_count) / self.query_count

print(f"queries: {self.query_count}")
print(f"top 1 EM: {pipeline_top_1_em}")
print(f"top k EM: {pipeline_top_k_em}")
print(f"top 1 F1: {pipeline_top_1_f1}")
print(f"top k F1: {pipeline_top_k_f1}")
print(f"top 1 EM: {pipeline_top_1_em:.4f}")
print(f"top k EM: {pipeline_top_k_em:.4f}")
print(f"top 1 F1: {pipeline_top_1_f1:.4f}")
print(f"top k F1: {pipeline_top_k_f1:.4f}")
if self.no_answer_count:
print(
"(top k results are likely inflated since the Reader always returns a no_answer prediction in its top k)"
Expand Down

0 comments on commit 8894c4f

Please sign in to comment.