Skip to content

Commit

Permalink
rewrite eval schema and saple according to OCR-D/zenhub#123
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Sep 21, 2022
1 parent 0cc4a0b commit 5aa6bd5
Show file tree
Hide file tree
Showing 6 changed files with 902 additions and 128 deletions.
14 changes: 7 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
json: \
bagit-profile.json \
ocrd_tool.schema.json \
openapi.json

deps:
pip install yaml click
json: $(shell find -name '*.json')

%.json: %.yml
python3 scripts/yaml-to-json.py $< $@

validate: json
jsonschema --output pretty --validator Draft201909Validator --instance ocrd_eval.sample.json ocrd_eval.schema.json

deps:
pip install yaml click jsonschema
120 changes: 120 additions & 0 deletions ocrd_eval.sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
[
{
"@id": "https://github.com/OCR-D/quiver/tree/data/evaluations/wf1-data345-eval1.json",
"label": "OCR workflow 1 on workspace 345",
"metadata": {
"ocr_workflow": {
"@id": "https://github.com/OCR-D/quiver/tree/data/workflows/1.nf",
"label": "OCR Workflow 1"
},
"eval_workflow": {
"@id": "https://github.com/OCR-D/quiver/tree/data/workflows/eval1.nf",
"label": "Evaluation Workflow 1"
},
"gt_workspace": {
"@id": "https://gt.ocr-d.de/workspace/789",
"label": "GT workspace 789 (19th century fraktur)"
},
"ocr_workspace": {
"@id": "https://github.com/OCR-D/quiver/tree/data/workspaces/3000.ocrd.zip",
"label": "OCR result workspace 3000"
},
"eval_workspace": {
"@id": "https://github.com/OCR-D/quiver/tree/data/workspaces/345.ocrd.zip",
"label": "Evaluation Workspace 345"
},
"workflow_steps": {
"0": "Processor A",
"1": "Processor B"
},
"workflow_model": "Fraktur_GT4HistOCR",
"document_metadata": {
"fonts": [
"antiqua",
"fraktur"
],
"publication_century": "1800-1900",
"publication_decade": "1850-1860",
"publication_year": 1855,
"number_of_pages": 100,
"layout": "simple"
}
},
"evaluation": {
"document_wide": {
"wall_time": 1234,
"cer": 0.57,
"cer_min_max": [
0.2,
0.57
]
},
"by_page": [
{
"page_id": "PHYS_0001",
"cer": 0.8,
"processing_time": 2.1
}
]
}
},
{
"@id": "https://github.com/OCR-D/quiver/tree/data/evaluations/wf2-data345-eval1.json",
"label": "OCR Workflow 2 on Data 345",
"metadata": {
"ocr_workflow": {
"@id": "https://github.com/OCR-D/quiver/tree/data/workflows/2.nf",
"label": "OCR Workflow 2"
},
"eval_workflow": {
"@id": "https://github.com/OCR-D/quiver/tree/data/workflows/eval1.nf",
"label": "Evaluation Workflow 1"
},
"gt_workspace": {
"@id": "https://gt.ocr-d.de/workspace/789",
"label": "GT workspace 789 (19th century fraktur)"
},
"ocr_workspace": {
"@id": "https://github.com/OCR-D/quiver/tree/data/workspaces/3000.ocrd.zip",
"label": "OCR result workspace 3000"
},
"eval_workspace": {
"@id": "https://github.com/OCR-D/quiver/tree/data/workspaces/345.ocrd.zip",
"label": "Evaluation Workspace 345"
},
"workflow_steps": {
"0": "Processor A",
"1": "Processor B"
},
"workflow_model": "Fraktur_GT4HistOCR",
"document_metadata": {
"fonts": [
"antiqua",
"fraktur"
],
"publication_century": "1800-1900",
"publication_decade": "1850-1860",
"publication_year": 1855,
"number_of_pages": 100,
"layout": "simple"
}
},
"evaluation": {
"document_wide": {
"wall_time": 4567,
"cer": 0.9,
"cer_min_max": [
0.2,
0.99
]
},
"by_page": [
{
"page_id": "PHYS_0001",
"cer": 0.9,
"processing_time": 2.1
}
]
}
}
]
109 changes: 78 additions & 31 deletions ocrd_eval.sample.yml
Original file line number Diff line number Diff line change
@@ -1,38 +1,85 @@
wf1-data345-eval1:
label: Workflow 1 on Data 345
- '@id': https://github.com/OCR-D/quiver/tree/data/evaluations/wf1-data345-eval1.json
label: OCR workflow 1 on workspace 345
metadata:
workflow: https://example.org/workflow/1
eval_workflow: https://example.org/workflow/eval1
eval_data: https://example.org/workspace/345
gt_data: https://gt.ocr-d.de/workspace/789
document:
publication_year: 1789
number_of_pages: 10
evaluations:
ocr_workflow:
'@id': https://github.com/OCR-D/quiver/tree/data/workflows/1.nf
label: OCR Workflow 1
eval_workflow:
'@id': https://github.com/OCR-D/quiver/tree/data/workflows/eval1.nf
label: Evaluation Workflow 1
gt_workspace:
'@id': https://gt.ocr-d.de/workspace/789
label: GT workspace 789 (19th century fraktur)
ocr_workspace:
'@id': https://github.com/OCR-D/quiver/tree/data/workspaces/3000.ocrd.zip
label: OCR result workspace 3000
eval_workspace:
'@id': https://github.com/OCR-D/quiver/tree/data/workspaces/345.ocrd.zip
label: Evaluation Workspace 345
workflow_steps:
'0': Processor A
'1': Processor B
workflow_model: Fraktur_GT4HistOCR
document_metadata:
fonts:
- antiqua
- fraktur
publication_century: 1800-1900
publication_decade: 1850-1860
publication_year: 1855
number_of_pages: 100
layout: simple
evaluation:
document_wide:
wall_time: 1234
cer: 0.57
cer_document_mean: 0.38
cer_document_median: 0.52
cer_document_standard_deviation: 0.12
cer_min_max:
- 0.2
- 0.57
by_page:
PHYS_0001:
cer: 0.8
wf2-data345-eval1:
label: Workflow 2 on Data 345
- page_id: PHYS_0001
cer: 0.8
processing_time: 2.1

- '@id': https://github.com/OCR-D/quiver/tree/data/evaluations/wf2-data345-eval1.json
label: OCR Workflow 2 on Data 345
metadata:
workflow: https://example.org/workflow/2
eval_workflow: https://example.org/workflow/eval1
eval_data: https://example.org/workspace/345
gt_data: https://gt.ocr-d.de/workspace/789
document:
publication_year: 1789
number_of_pages: 10
evaluations:
ocr_workflow:
'@id': https://github.com/OCR-D/quiver/tree/data/workflows/2.nf
label: OCR Workflow 2
eval_workflow:
'@id': https://github.com/OCR-D/quiver/tree/data/workflows/eval1.nf
label: Evaluation Workflow 1
gt_workspace:
'@id': https://gt.ocr-d.de/workspace/789
label: GT workspace 789 (19th century fraktur)
ocr_workspace:
'@id': https://github.com/OCR-D/quiver/tree/data/workspaces/3000.ocrd.zip
label: OCR result workspace 3000
eval_workspace:
'@id': https://github.com/OCR-D/quiver/tree/data/workspaces/345.ocrd.zip
label: Evaluation Workspace 345
workflow_steps:
'0': Processor A
'1': Processor B
workflow_model: Fraktur_GT4HistOCR
document_metadata:
fonts:
- antiqua
- fraktur
publication_century: 1800-1900
publication_decade: 1850-1860
publication_year: 1855
number_of_pages: 100
layout: simple
evaluation:
document_wide:
cer: 0.88
cer_document_mean: 0.77
cer_document_median: 0.66
cer_document_standard_deviation: 0.55
wall_time: 4567
cer: 0.9
cer_min_max:
- 0.2
- 0.99
by_page:
PHYS_0001:
cer: 0.9
- page_id: PHYS_0001
cer: 0.9
processing_time: 2.1
Loading

0 comments on commit 5aa6bd5

Please sign in to comment.