Skip to content

Commit

Permalink
refactor test aftifacts for increased readability
Browse files Browse the repository at this point in the history
  • Loading branch information
conitrade-as committed Apr 26, 2021
1 parent dd42edd commit 2b5cc0d
Show file tree
Hide file tree
Showing 37 changed files with 580 additions and 60 deletions.
89 changes: 29 additions & 60 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import json

from karton.core import Task
from karton.core.test import KartonTestCase, TestResource
Expand All @@ -9,69 +10,37 @@
class ClassifierTestCase(KartonTestCase):
karton_class = Classifier

def _single_file_test(
self, name, content, tag,
kind=None, platform=None, extension=None, stage="recognized"
):
sample = TestResource(name, content)
task = Task({
"type": "sample",
"kind": "raw"
}, payload={
"sample": sample,
"extraction_level": 999
})
results = self.run_task(task)
if kind is None:
self.assertTasksEqual(results, [])
else:
expected_headers = {
"origin": "karton.classifier",
"type": "sample",
"stage": stage,
"quality": "high",
"kind": kind
}
if platform:
expected_headers["platform"] = platform
if extension:
expected_headers["extension"] = extension

payload = {
"sample": sample,
"extraction_level": 999,
}
if tag:
payload["tags"] = [tag]

self.assertTasksEqual(results, [Task(expected_headers, payload)])

def test_works_as_expected(self):
self.maxDiff = None
for testcase in os.listdir("tests/testdata"):
print(testcase)
if not testcase.endswith('.json'):
continue

testcase_config = testcase
testcase_content = testcase_config.replace('.json', '')

with self.subTest(testcase):
with open(f"tests/testdata/{testcase}", "rb") as f:
with open(f"tests/testdata/{testcase_config}", "rb") as f:
expected = json.load(f)
with open(f"tests/testdata/{testcase_content}", "rb") as f:
content = f.read()
expected_tag, file_name = testcase.split("-", 1)
tag_elements = expected_tag.split(":")

# "misc" prefix is added only for mwdb compatibility
if tag_elements[0] == "misc":
tag_elements = tag_elements[1:]

headers = {
"kind": tag_elements[0],
'stage': "recognized",
sample = TestResource(testcase_content, content)
task = Task({
"type": "sample",
"kind": "raw"
}, payload={
"sample": sample,
"extraction_level": 999,
})

payload = {
"sample": sample,
"extraction_level": 999,
}
if len(tag_elements) > 1:
if tag_elements[0] == "archive":
headers["extension"] = tag_elements[1]
elif tag_elements[0] == "unknown":
expected_tag = None
headers['stage'] = "unrecognized"
else:
headers["platform"] = tag_elements[1]
if len(tag_elements) > 2:
headers["extension"] = tag_elements[2]
self._single_file_test(
file_name, content, expected_tag, **headers
)
if expected.get("payload"):
payload.update(expected["payload"])

res = self.run_task(task)
self.assertTasksEqual(res, [Task(expected["headers"], payload)])
15 changes: 15 additions & 0 deletions tests/testdata/archive:7z-sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "7z",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:7z"
]
}
}
15 changes: 15 additions & 0 deletions tests/testdata/archive:ace-sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "ace",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:ace"
]
}
}
15 changes: 15 additions & 0 deletions tests/testdata/archive:bz2-sample.bz2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "bz2",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:bz2"
]
}
}
15 changes: 15 additions & 0 deletions tests/testdata/archive:cab-sample.cab.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "cab",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:cab"
]
}
}
15 changes: 15 additions & 0 deletions tests/testdata/archive:cab-sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "cab",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:cab"
]
}
}
15 changes: 15 additions & 0 deletions tests/testdata/archive:gz-sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "gz",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:gz"
]
}
}
15 changes: 15 additions & 0 deletions tests/testdata/archive:iso-sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "iso",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:iso"
]
}
}
15 changes: 15 additions & 0 deletions tests/testdata/archive:lz-sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "lz",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:lz"
]
}
}
15 changes: 15 additions & 0 deletions tests/testdata/archive:rar-sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "rar",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:rar"
]
}
}
15 changes: 15 additions & 0 deletions tests/testdata/archive:tar-sample.tar.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "tar",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:tar"
]
}
}
15 changes: 15 additions & 0 deletions tests/testdata/archive:udf-sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "udf",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:udf"
]
}
}
15 changes: 15 additions & 0 deletions tests/testdata/archive:xz-sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "xz",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:xz"
]
}
}
15 changes: 15 additions & 0 deletions tests/testdata/archive:zip-sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"headers": {
"extension": "zip",
"kind": "archive",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample"
},
"payload": {
"tags": [
"archive:zip"
]
}
}
16 changes: 16 additions & 0 deletions tests/testdata/document:win32:doc-12a_a.doc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"headers": {
"extension": "doc",
"kind": "document",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample",
"platform": "win32"
},
"payload": {
"tags": [
"document:win32:doc"
]
}
}
16 changes: 16 additions & 0 deletions tests/testdata/document:win32:docx-EngWhiteKen3081.doc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"headers": {
"extension": "docx",
"kind": "document",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample",
"platform": "win32"
},
"payload": {
"tags": [
"document:win32:docx"
]
}
}
16 changes: 16 additions & 0 deletions tests/testdata/document:win32:pdf-Doc-6235300-202005.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"headers": {
"extension": "pdf",
"kind": "document",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample",
"platform": "win32"
},
"payload": {
"tags": [
"document:win32:pdf"
]
}
}
16 changes: 16 additions & 0 deletions tests/testdata/document:win32:rtf-document.doc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"headers": {
"extension": "rtf",
"kind": "document",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample",
"platform": "win32"
},
"payload": {
"tags": [
"document:win32:rtf"
]
}
}
16 changes: 16 additions & 0 deletions tests/testdata/document:win32:xls-faktura_82.xls.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"headers": {
"extension": "xls",
"kind": "document",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
"type": "sample",
"platform": "win32"
},
"payload": {
"tags": [
"document:win32:xls"
]
}
}
Loading

0 comments on commit 2b5cc0d

Please sign in to comment.