diff --git a/tests/__init__.py b/tests/__init__.py index b29fc03..ca515ce 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,5 @@ import os +import json from karton.core import Task from karton.core.test import KartonTestCase, TestResource @@ -9,69 +10,37 @@ class ClassifierTestCase(KartonTestCase): karton_class = Classifier - def _single_file_test( - self, name, content, tag, - kind=None, platform=None, extension=None, stage="recognized" - ): - sample = TestResource(name, content) - task = Task({ - "type": "sample", - "kind": "raw" - }, payload={ - "sample": sample, - "extraction_level": 999 - }) - results = self.run_task(task) - if kind is None: - self.assertTasksEqual(results, []) - else: - expected_headers = { - "origin": "karton.classifier", - "type": "sample", - "stage": stage, - "quality": "high", - "kind": kind - } - if platform: - expected_headers["platform"] = platform - if extension: - expected_headers["extension"] = extension - - payload = { - "sample": sample, - "extraction_level": 999, - } - if tag: - payload["tags"] = [tag] - - self.assertTasksEqual(results, [Task(expected_headers, payload)]) - def test_works_as_expected(self): + self.maxDiff = None for testcase in os.listdir("tests/testdata"): + print(testcase) + if not testcase.endswith('.json'): + continue + + testcase_config = testcase + testcase_content = testcase_config.replace('.json', '') + with self.subTest(testcase): - with open(f"tests/testdata/{testcase}", "rb") as f: + with open(f"tests/testdata/{testcase_config}", "rb") as f: + expected = json.load(f) + with open(f"tests/testdata/{testcase_content}", "rb") as f: content = f.read() - expected_tag, file_name = testcase.split("-", 1) - tag_elements = expected_tag.split(":") - - # "misc" prefix is added only for mwdb compatibility - if tag_elements[0] == "misc": - tag_elements = tag_elements[1:] - headers = { - "kind": tag_elements[0], - 'stage': "recognized", + sample = TestResource(testcase_content, content) + task = Task({ + "type": "sample", + "kind": "raw" + }, payload={ + "sample": sample, + "extraction_level": 999, + }) + + payload = { + "sample": sample, + "extraction_level": 999, } - if len(tag_elements) > 1: - if tag_elements[0] == "archive": - headers["extension"] = tag_elements[1] - elif tag_elements[0] == "unknown": - expected_tag = None - headers['stage'] = "unrecognized" - else: - headers["platform"] = tag_elements[1] - if len(tag_elements) > 2: - headers["extension"] = tag_elements[2] - self._single_file_test( - file_name, content, expected_tag, **headers - ) + if expected.get("payload"): + payload.update(expected["payload"]) + + res = self.run_task(task) + self.assertTasksEqual(res, [Task(expected["headers"], payload)]) diff --git a/tests/testdata/archive:7z-sample.json b/tests/testdata/archive:7z-sample.json new file mode 100644 index 0000000..ad28b23 --- /dev/null +++ b/tests/testdata/archive:7z-sample.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "7z", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:7z" + ] + } +} diff --git a/tests/testdata/archive:ace-sample.json b/tests/testdata/archive:ace-sample.json new file mode 100644 index 0000000..00d1e69 --- /dev/null +++ b/tests/testdata/archive:ace-sample.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "ace", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:ace" + ] + } +} diff --git a/tests/testdata/archive:bz2-sample.bz2.json b/tests/testdata/archive:bz2-sample.bz2.json new file mode 100644 index 0000000..ee58f26 --- /dev/null +++ b/tests/testdata/archive:bz2-sample.bz2.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "bz2", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:bz2" + ] + } +} diff --git a/tests/testdata/archive:cab-sample.cab.json b/tests/testdata/archive:cab-sample.cab.json new file mode 100644 index 0000000..568b0f5 --- /dev/null +++ b/tests/testdata/archive:cab-sample.cab.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "cab", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:cab" + ] + } +} diff --git a/tests/testdata/archive:cab-sample.json b/tests/testdata/archive:cab-sample.json new file mode 100644 index 0000000..568b0f5 --- /dev/null +++ b/tests/testdata/archive:cab-sample.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "cab", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:cab" + ] + } +} diff --git a/tests/testdata/archive:gz-sample.json b/tests/testdata/archive:gz-sample.json new file mode 100644 index 0000000..f886f78 --- /dev/null +++ b/tests/testdata/archive:gz-sample.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "gz", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:gz" + ] + } +} diff --git a/tests/testdata/archive:iso-sample.json b/tests/testdata/archive:iso-sample.json new file mode 100644 index 0000000..ba85b76 --- /dev/null +++ b/tests/testdata/archive:iso-sample.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "iso", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:iso" + ] + } +} diff --git a/tests/testdata/archive:lz-sample.json b/tests/testdata/archive:lz-sample.json new file mode 100644 index 0000000..140fdca --- /dev/null +++ b/tests/testdata/archive:lz-sample.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "lz", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:lz" + ] + } +} diff --git a/tests/testdata/archive:rar-sample.json b/tests/testdata/archive:rar-sample.json new file mode 100644 index 0000000..b1303b7 --- /dev/null +++ b/tests/testdata/archive:rar-sample.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "rar", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:rar" + ] + } +} diff --git a/tests/testdata/archive:tar-sample.tar.json b/tests/testdata/archive:tar-sample.tar.json new file mode 100644 index 0000000..47103a8 --- /dev/null +++ b/tests/testdata/archive:tar-sample.tar.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "tar", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:tar" + ] + } +} diff --git a/tests/testdata/archive:udf-sample.json b/tests/testdata/archive:udf-sample.json new file mode 100644 index 0000000..8a4dc68 --- /dev/null +++ b/tests/testdata/archive:udf-sample.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "udf", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:udf" + ] + } +} diff --git a/tests/testdata/archive:xz-sample.json b/tests/testdata/archive:xz-sample.json new file mode 100644 index 0000000..e80da7b --- /dev/null +++ b/tests/testdata/archive:xz-sample.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "xz", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:xz" + ] + } +} diff --git a/tests/testdata/archive:zip-sample.json b/tests/testdata/archive:zip-sample.json new file mode 100644 index 0000000..72151b5 --- /dev/null +++ b/tests/testdata/archive:zip-sample.json @@ -0,0 +1,15 @@ +{ + "headers": { + "extension": "zip", + "kind": "archive", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "archive:zip" + ] + } +} diff --git a/tests/testdata/document:win32:doc-12a_a.doc.json b/tests/testdata/document:win32:doc-12a_a.doc.json new file mode 100644 index 0000000..50a6d61 --- /dev/null +++ b/tests/testdata/document:win32:doc-12a_a.doc.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "doc", + "kind": "document", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "document:win32:doc" + ] + } +} diff --git a/tests/testdata/document:win32:docx-EngWhiteKen3081.doc.json b/tests/testdata/document:win32:docx-EngWhiteKen3081.doc.json new file mode 100644 index 0000000..db6a582 --- /dev/null +++ b/tests/testdata/document:win32:docx-EngWhiteKen3081.doc.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "docx", + "kind": "document", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "document:win32:docx" + ] + } +} diff --git a/tests/testdata/document:win32:pdf-Doc-6235300-202005.json b/tests/testdata/document:win32:pdf-Doc-6235300-202005.json new file mode 100644 index 0000000..4ed5bd3 --- /dev/null +++ b/tests/testdata/document:win32:pdf-Doc-6235300-202005.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "pdf", + "kind": "document", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "document:win32:pdf" + ] + } +} diff --git a/tests/testdata/document:win32:rtf-document.doc.json b/tests/testdata/document:win32:rtf-document.doc.json new file mode 100644 index 0000000..399a279 --- /dev/null +++ b/tests/testdata/document:win32:rtf-document.doc.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "rtf", + "kind": "document", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "document:win32:rtf" + ] + } +} diff --git a/tests/testdata/document:win32:xls-faktura_82.xls.json b/tests/testdata/document:win32:xls-faktura_82.xls.json new file mode 100644 index 0000000..967f90d --- /dev/null +++ b/tests/testdata/document:win32:xls-faktura_82.xls.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "xls", + "kind": "document", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "document:win32:xls" + ] + } +} diff --git a/tests/testdata/document:win32:xlsx-order_764029.xlsm.json b/tests/testdata/document:win32:xlsx-order_764029.xlsm.json new file mode 100644 index 0000000..bd05ff8 --- /dev/null +++ b/tests/testdata/document:win32:xlsx-order_764029.xlsm.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "xlsx", + "kind": "document", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "document:win32:xlsx" + ] + } +} diff --git a/tests/testdata/misc:ascii-D7niSVLx.json b/tests/testdata/misc:ascii-D7niSVLx.json new file mode 100644 index 0000000..ad2f0d3 --- /dev/null +++ b/tests/testdata/misc:ascii-D7niSVLx.json @@ -0,0 +1,14 @@ +{ + "headers": { + "kind": "ascii", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "misc:ascii" + ] + } +} diff --git a/tests/testdata/misc:html-faktura_120207.json b/tests/testdata/misc:html-faktura_120207.json new file mode 100644 index 0000000..6660bba --- /dev/null +++ b/tests/testdata/misc:html-faktura_120207.json @@ -0,0 +1,14 @@ +{ + "headers": { + "kind": "html", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample" + }, + "payload": { + "tags": [ + "misc:html" + ] + } +} diff --git a/tests/testdata/runnable:android:dex-sample.json b/tests/testdata/runnable:android:dex-sample.json new file mode 100644 index 0000000..184d4c4 --- /dev/null +++ b/tests/testdata/runnable:android:dex-sample.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "dex", + "kind": "runnable", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "android" + }, + "payload": { + "tags": [ + "runnable:android:dex" + ] + } +} diff --git a/tests/testdata/runnable:linux-bomba.spc.json b/tests/testdata/runnable:linux-bomba.spc.json new file mode 100644 index 0000000..9ebe535 --- /dev/null +++ b/tests/testdata/runnable:linux-bomba.spc.json @@ -0,0 +1,15 @@ +{ + "headers": { + "kind": "runnable", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "linux" + }, + "payload": { + "tags": [ + "runnable:linux" + ] + } +} diff --git a/tests/testdata/runnable:win32:dll-740000_5e637343801dcee0.json b/tests/testdata/runnable:win32:dll-740000_5e637343801dcee0.json new file mode 100644 index 0000000..c23f877 --- /dev/null +++ b/tests/testdata/runnable:win32:dll-740000_5e637343801dcee0.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "dll", + "kind": "runnable", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "runnable:win32:dll" + ] + } +} diff --git a/tests/testdata/runnable:win32:exe-400000_6179792a2973254f.json b/tests/testdata/runnable:win32:exe-400000_6179792a2973254f.json new file mode 100644 index 0000000..808a18c --- /dev/null +++ b/tests/testdata/runnable:win32:exe-400000_6179792a2973254f.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "exe", + "kind": "runnable", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "runnable:win32:exe" + ] + } +} diff --git a/tests/testdata/runnable:win32:jar-None.json b/tests/testdata/runnable:win32:jar-None.json new file mode 100644 index 0000000..e5e5b5a --- /dev/null +++ b/tests/testdata/runnable:win32:jar-None.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "jar", + "kind": "runnable", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "runnable:win32:jar" + ] + } +} diff --git a/tests/testdata/runnable:win32:lnk-PO_4700040755.jpg.lnk.json b/tests/testdata/runnable:win32:lnk-PO_4700040755.jpg.lnk.json new file mode 100644 index 0000000..c2fa626 --- /dev/null +++ b/tests/testdata/runnable:win32:lnk-PO_4700040755.jpg.lnk.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "lnk", + "kind": "runnable", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "runnable:win32:lnk" + ] + } +} diff --git a/tests/testdata/runnable:win32:msi-installer.json b/tests/testdata/runnable:win32:msi-installer.json new file mode 100644 index 0000000..c99b7c4 --- /dev/null +++ b/tests/testdata/runnable:win32:msi-installer.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "msi", + "kind": "runnable", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "runnable:win32:msi" + ] + } +} diff --git a/tests/testdata/runnable:win32:swf-login.json b/tests/testdata/runnable:win32:swf-login.json new file mode 100644 index 0000000..6c04d00 --- /dev/null +++ b/tests/testdata/runnable:win32:swf-login.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "swf", + "kind": "runnable", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "runnable:win32:swf" + ] + } +} diff --git a/tests/testdata/runnable:win64:dll-isfb_x64.json b/tests/testdata/runnable:win64:dll-isfb_x64.json new file mode 100644 index 0000000..bf86f2c --- /dev/null +++ b/tests/testdata/runnable:win64:dll-isfb_x64.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "dll", + "kind": "runnable", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win64" + }, + "payload": { + "tags": [ + "runnable:win64:dll" + ] + } +} diff --git a/tests/testdata/runnable:win64:exe-cpu64.dll.json b/tests/testdata/runnable:win64:exe-cpu64.dll.json new file mode 100644 index 0000000..ea54602 --- /dev/null +++ b/tests/testdata/runnable:win64:exe-cpu64.dll.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "exe", + "kind": "runnable", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win64" + }, + "payload": { + "tags": [ + "runnable:win64:exe" + ] + } +} diff --git a/tests/testdata/script:win32:js-dropper.json b/tests/testdata/script:win32:js-dropper.json new file mode 100644 index 0000000..9c54ec6 --- /dev/null +++ b/tests/testdata/script:win32:js-dropper.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "js", + "kind": "script", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "script:win32:js" + ] + } +} diff --git a/tests/testdata/script:win32:jse-js_script.json b/tests/testdata/script:win32:jse-js_script.json new file mode 100644 index 0000000..a530ac7 --- /dev/null +++ b/tests/testdata/script:win32:jse-js_script.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "jse", + "kind": "script", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "script:win32:jse" + ] + } +} diff --git a/tests/testdata/script:win32:ps1-reminettance.ps1.json b/tests/testdata/script:win32:ps1-reminettance.ps1.json new file mode 100644 index 0000000..e62b4f5 --- /dev/null +++ b/tests/testdata/script:win32:ps1-reminettance.ps1.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "ps1", + "kind": "script", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "script:win32:ps1" + ] + } +} diff --git a/tests/testdata/script:win32:vbs-wn4XZ9Xs.json b/tests/testdata/script:win32:vbs-wn4XZ9Xs.json new file mode 100644 index 0000000..3a8fa8b --- /dev/null +++ b/tests/testdata/script:win32:vbs-wn4XZ9Xs.json @@ -0,0 +1,16 @@ +{ + "headers": { + "extension": "vbs", + "kind": "script", + "origin": "karton.classifier", + "quality": "high", + "stage": "recognized", + "type": "sample", + "platform": "win32" + }, + "payload": { + "tags": [ + "script:win32:vbs" + ] + } +} diff --git a/tests/testdata/unknown:unknown-sample.json b/tests/testdata/unknown:unknown-sample.json new file mode 100644 index 0000000..237c428 --- /dev/null +++ b/tests/testdata/unknown:unknown-sample.json @@ -0,0 +1,9 @@ +{ + "headers": { + "kind": "unknown", + "origin": "karton.classifier", + "quality": "high", + "stage": "unrecognized", + "type": "sample" + } +}