From 0e9a351d24e15d9a9324478aaa8c4bd184be1579 Mon Sep 17 00:00:00 2001 From: Alexander Sennhauser Date: Mon, 26 Apr 2021 20:26:36 +0200 Subject: [PATCH] return magic description and mime type --- karton/classifier/classifier.py | 4 ++++ tests/testdata/archive:7z-sample.json | 2 ++ tests/testdata/archive:ace-sample.json | 2 ++ tests/testdata/archive:bz2-sample.bz2.json | 2 ++ tests/testdata/archive:cab-sample.cab.json | 2 ++ tests/testdata/archive:cab-sample.json | 2 ++ tests/testdata/archive:gz-sample.json | 2 ++ tests/testdata/archive:iso-sample.json | 2 ++ tests/testdata/archive:lz-sample.json | 2 ++ tests/testdata/archive:rar-sample.json | 2 ++ tests/testdata/archive:tar-sample.tar.json | 2 ++ tests/testdata/archive:udf-sample.json | 2 ++ tests/testdata/archive:xz-sample.json | 2 ++ tests/testdata/archive:zip-sample.json | 2 ++ tests/testdata/document:win32:doc-12a_a.doc.json | 2 ++ tests/testdata/document:win32:docx-EngWhiteKen3081.doc.json | 2 ++ tests/testdata/document:win32:pdf-Doc-6235300-202005.json | 2 ++ tests/testdata/document:win32:rtf-document.doc.json | 2 ++ tests/testdata/document:win32:xls-faktura_82.xls.json | 2 ++ tests/testdata/document:win32:xlsx-order_764029.xlsm.json | 2 ++ tests/testdata/misc:ascii-D7niSVLx.json | 2 ++ tests/testdata/misc:html-faktura_120207.json | 2 ++ tests/testdata/runnable:android:dex-sample.json | 2 ++ tests/testdata/runnable:linux-bomba.spc.json | 2 ++ .../testdata/runnable:win32:dll-740000_5e637343801dcee0.json | 2 ++ .../testdata/runnable:win32:exe-400000_6179792a2973254f.json | 2 ++ tests/testdata/runnable:win32:jar-None.json | 2 ++ tests/testdata/runnable:win32:lnk-PO_4700040755.jpg.lnk.json | 2 ++ tests/testdata/runnable:win32:msi-installer.json | 2 ++ tests/testdata/runnable:win32:swf-login.json | 2 ++ tests/testdata/runnable:win64:dll-isfb_x64.json | 2 ++ tests/testdata/runnable:win64:exe-cpu64.dll.json | 2 ++ tests/testdata/script:win32:js-dropper.json | 2 ++ tests/testdata/script:win32:jse-js_script.json | 2 ++ tests/testdata/script:win32:ps1-reminettance.ps1.json | 2 ++ tests/testdata/script:win32:vbs-wn4XZ9Xs.json | 2 ++ 36 files changed, 74 insertions(+) diff --git a/karton/classifier/classifier.py b/karton/classifier/classifier.py index 3128ccb..7727f36 100644 --- a/karton/classifier/classifier.py +++ b/karton/classifier/classifier.py @@ -116,8 +116,10 @@ def _classify(self, task: Task) -> Optional[Dict[str, str]]: content = cast(bytes, sample.content) magic = task.get_payload("magic") or "" + magic_mime = task.get_payload("mime") or "" try: magic = pymagic.from_buffer(content) + magic_mime = pymagic.from_buffer(content, mime=True) except Exception as ex: self.log.warning(f"unable to get magic: {ex}") @@ -126,6 +128,8 @@ def _classify(self, task: Task) -> Optional[Dict[str, str]]: "type": "sample", "stage": "recognized", "quality": task.headers.get("quality", "high"), + "magic": magic if magic else None, + "mime": magic_mime if magic_mime else None, } # Is PE file? diff --git a/tests/testdata/archive:7z-sample.json b/tests/testdata/archive:7z-sample.json index ad28b23..dbae9a6 100644 --- a/tests/testdata/archive:7z-sample.json +++ b/tests/testdata/archive:7z-sample.json @@ -2,6 +2,8 @@ "headers": { "extension": "7z", "kind": "archive", + "magic": "7-zip archive data, version 0.4", + "mime": "application/x-7z-compressed", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/archive:ace-sample.json b/tests/testdata/archive:ace-sample.json index 00d1e69..4a7032c 100644 --- a/tests/testdata/archive:ace-sample.json +++ b/tests/testdata/archive:ace-sample.json @@ -2,6 +2,8 @@ "headers": { "extension": "ace", "kind": "archive", + "magic": "ACE archive data version 20, from Win/32, version 20 to extract, contains AV-String (unregistered), solid", + "mime": "application/octet-stream", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/archive:bz2-sample.bz2.json b/tests/testdata/archive:bz2-sample.bz2.json index ee58f26..550ff22 100644 --- a/tests/testdata/archive:bz2-sample.bz2.json +++ b/tests/testdata/archive:bz2-sample.bz2.json @@ -2,6 +2,8 @@ "headers": { "extension": "bz2", "kind": "archive", + "magic": "bzip2 compressed data, block size = 900k", + "mime": "application/x-bzip2", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/archive:cab-sample.cab.json b/tests/testdata/archive:cab-sample.cab.json index 568b0f5..347034f 100644 --- a/tests/testdata/archive:cab-sample.cab.json +++ b/tests/testdata/archive:cab-sample.cab.json @@ -2,6 +2,8 @@ "headers": { "extension": "cab", "kind": "archive", + "magic": "Microsoft Cabinet archive data, Windows 2000/XP setup, 235156 bytes, 1 file, at 0x2c +A \"RFQ and Company Profile_PDF.exe\", number 1, 12 datablocks, 0x1503 compression", + "mime": "application/vnd.ms-cab-compressed", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/archive:cab-sample.json b/tests/testdata/archive:cab-sample.json index 568b0f5..ff69cca 100644 --- a/tests/testdata/archive:cab-sample.json +++ b/tests/testdata/archive:cab-sample.json @@ -2,6 +2,8 @@ "headers": { "extension": "cab", "kind": "archive", + "magic": "Microsoft Cabinet archive data, Windows 2000/XP setup, 5099 bytes, 1 file, at 0x2c +A \"360se.ini\", number 1, 1 datablock, 0x1 compression", + "mime": "application/vnd.ms-cab-compressed", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/archive:gz-sample.json b/tests/testdata/archive:gz-sample.json index f886f78..3421ef5 100644 --- a/tests/testdata/archive:gz-sample.json +++ b/tests/testdata/archive:gz-sample.json @@ -2,6 +2,8 @@ "headers": { "extension": "gz", "kind": "archive", + "magic": "gzip compressed data, was \"Order 002_PDF.exe\", last modified: Thu Apr 30 23:25:26 2020, from FAT filesystem (MS-DOS, OS/2, NT)", + "mime": "application/gzip", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/archive:iso-sample.json b/tests/testdata/archive:iso-sample.json index ba85b76..e04b8c3 100644 --- a/tests/testdata/archive:iso-sample.json +++ b/tests/testdata/archive:iso-sample.json @@ -2,6 +2,8 @@ "headers": { "extension": "iso", "kind": "archive", + "magic": "ISO 9660 CD-ROM filesystem data 'DHL Shipping Document (Please Si'", + "mime": "application/x-iso9660-image", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/archive:lz-sample.json b/tests/testdata/archive:lz-sample.json index 140fdca..ff19519 100644 --- a/tests/testdata/archive:lz-sample.json +++ b/tests/testdata/archive:lz-sample.json @@ -2,6 +2,8 @@ "headers": { "extension": "lz", "kind": "archive", + "magic": "lzip compressed data, version: 1", + "mime": "application/x-lzip", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/archive:rar-sample.json b/tests/testdata/archive:rar-sample.json index b1303b7..bd6f9fa 100644 --- a/tests/testdata/archive:rar-sample.json +++ b/tests/testdata/archive:rar-sample.json @@ -2,6 +2,8 @@ "headers": { "extension": "rar", "kind": "archive", + "magic": "RAR archive data, v5", + "mime": "application/x-rar", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/archive:tar-sample.tar.json b/tests/testdata/archive:tar-sample.tar.json index 47103a8..c4bf493 100644 --- a/tests/testdata/archive:tar-sample.tar.json +++ b/tests/testdata/archive:tar-sample.tar.json @@ -2,6 +2,8 @@ "headers": { "extension": "tar", "kind": "archive", + "magic": "POSIX tar archive", + "mime": "application/x-tar", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/archive:udf-sample.json b/tests/testdata/archive:udf-sample.json index 8a4dc68..da1a660 100644 --- a/tests/testdata/archive:udf-sample.json +++ b/tests/testdata/archive:udf-sample.json @@ -2,6 +2,8 @@ "headers": { "extension": "udf", "kind": "archive", + "magic": "UDF filesystem data (version 1.5) '06_25_2020'", + "mime": "application/x-iso9660-image", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/archive:xz-sample.json b/tests/testdata/archive:xz-sample.json index e80da7b..4b088ee 100644 --- a/tests/testdata/archive:xz-sample.json +++ b/tests/testdata/archive:xz-sample.json @@ -2,6 +2,8 @@ "headers": { "extension": "xz", "kind": "archive", + "magic": "XZ compressed data", + "mime": "application/x-xz", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/archive:zip-sample.json b/tests/testdata/archive:zip-sample.json index 72151b5..1b524f2 100644 --- a/tests/testdata/archive:zip-sample.json +++ b/tests/testdata/archive:zip-sample.json @@ -2,6 +2,8 @@ "headers": { "extension": "zip", "kind": "archive", + "magic": "Zip archive data, at least v2.0 to extract", + "mime": "application/zip", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/document:win32:doc-12a_a.doc.json b/tests/testdata/document:win32:doc-12a_a.doc.json index 50a6d61..c1390a6 100644 --- a/tests/testdata/document:win32:doc-12a_a.doc.json +++ b/tests/testdata/document:win32:doc-12a_a.doc.json @@ -2,6 +2,8 @@ "headers": { "extension": "doc", "kind": "document", + "magic": "Composite Document File V2 Document, Little Endian, Os: Windows, Version 6.1, Code page: 1251, Title: , Template: Normal, Last Saved By: Z, Revision Number: 5, Name of Creating Application: Microsoft Office Word, Total Editing Time: 01:33:00, Last Printed: Sat Nov 5 19:25:00 2016, Create Time/Date: Sun Oct 30 16:29:00 2016, Last Saved Time/Date: Sat Nov 5 19:28:00 2016, Number of Pages: 9, Number of Words: 1800, Number of Characters: 10264, Security: 0", + "mime": "application/msword", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/document:win32:docx-EngWhiteKen3081.doc.json b/tests/testdata/document:win32:docx-EngWhiteKen3081.doc.json index db6a582..a1895bb 100644 --- a/tests/testdata/document:win32:docx-EngWhiteKen3081.doc.json +++ b/tests/testdata/document:win32:docx-EngWhiteKen3081.doc.json @@ -2,6 +2,8 @@ "headers": { "extension": "docx", "kind": "document", + "magic": "Microsoft Word 2007+", + "mime": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/document:win32:pdf-Doc-6235300-202005.json b/tests/testdata/document:win32:pdf-Doc-6235300-202005.json index 4ed5bd3..890c934 100644 --- a/tests/testdata/document:win32:pdf-Doc-6235300-202005.json +++ b/tests/testdata/document:win32:pdf-Doc-6235300-202005.json @@ -2,6 +2,8 @@ "headers": { "extension": "pdf", "kind": "document", + "magic": "PDF document, version 1.4", + "mime": "application/pdf", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/document:win32:rtf-document.doc.json b/tests/testdata/document:win32:rtf-document.doc.json index 399a279..61266fd 100644 --- a/tests/testdata/document:win32:rtf-document.doc.json +++ b/tests/testdata/document:win32:rtf-document.doc.json @@ -2,6 +2,8 @@ "headers": { "extension": "rtf", "kind": "document", + "magic": "Rich Text Format data, version 1, unknown character set", + "mime": "text/rtf", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/document:win32:xls-faktura_82.xls.json b/tests/testdata/document:win32:xls-faktura_82.xls.json index 967f90d..b29a4c0 100644 --- a/tests/testdata/document:win32:xls-faktura_82.xls.json +++ b/tests/testdata/document:win32:xls-faktura_82.xls.json @@ -2,6 +2,8 @@ "headers": { "extension": "xls", "kind": "document", + "magic": "Composite Document File V2 Document, Little Endian, Os: Windows, Version 6.2, Code page: 1252, Name of Creating Application: Microsoft Excel, Create Time/Date: Thu Mar 19 21:34:27 2020, Last Saved Time/Date: Thu Mar 19 21:47:49 2020, Security: 0", + "mime": "application/vnd.ms-excel", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/document:win32:xlsx-order_764029.xlsm.json b/tests/testdata/document:win32:xlsx-order_764029.xlsm.json index bd05ff8..d1cbf5d 100644 --- a/tests/testdata/document:win32:xlsx-order_764029.xlsm.json +++ b/tests/testdata/document:win32:xlsx-order_764029.xlsm.json @@ -2,6 +2,8 @@ "headers": { "extension": "xlsx", "kind": "document", + "magic": "Microsoft Excel 2007+", + "mime": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/misc:ascii-D7niSVLx.json b/tests/testdata/misc:ascii-D7niSVLx.json index ad2f0d3..1420125 100644 --- a/tests/testdata/misc:ascii-D7niSVLx.json +++ b/tests/testdata/misc:ascii-D7niSVLx.json @@ -1,6 +1,8 @@ { "headers": { "kind": "ascii", + "magic": "ASCII text, with very long lines, with no line terminators", + "mime": "text/plain", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/misc:html-faktura_120207.json b/tests/testdata/misc:html-faktura_120207.json index 6660bba..321238e 100644 --- a/tests/testdata/misc:html-faktura_120207.json +++ b/tests/testdata/misc:html-faktura_120207.json @@ -1,6 +1,8 @@ { "headers": { "kind": "html", + "magic": "HTML document, ASCII text", + "mime": "text/html", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/runnable:android:dex-sample.json b/tests/testdata/runnable:android:dex-sample.json index 184d4c4..216778f 100644 --- a/tests/testdata/runnable:android:dex-sample.json +++ b/tests/testdata/runnable:android:dex-sample.json @@ -2,6 +2,8 @@ "headers": { "extension": "dex", "kind": "runnable", + "magic": "Dalvik dex file version 035", + "mime": "application/octet-stream", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/runnable:linux-bomba.spc.json b/tests/testdata/runnable:linux-bomba.spc.json index 9ebe535..f7dbf1f 100644 --- a/tests/testdata/runnable:linux-bomba.spc.json +++ b/tests/testdata/runnable:linux-bomba.spc.json @@ -1,6 +1,8 @@ { "headers": { "kind": "runnable", + "magic": "ELF 32-bit MSB executable, SPARC, version 1 (SYSV)", + "mime": "application/x-executable", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/runnable:win32:dll-740000_5e637343801dcee0.json b/tests/testdata/runnable:win32:dll-740000_5e637343801dcee0.json index c23f877..d990b77 100644 --- a/tests/testdata/runnable:win32:dll-740000_5e637343801dcee0.json +++ b/tests/testdata/runnable:win32:dll-740000_5e637343801dcee0.json @@ -2,6 +2,8 @@ "headers": { "extension": "dll", "kind": "runnable", + "magic": "PE32 executable (DLL) (console) Intel 80386, for MS Windows", + "mime": "application/x-dosexec", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/runnable:win32:exe-400000_6179792a2973254f.json b/tests/testdata/runnable:win32:exe-400000_6179792a2973254f.json index 808a18c..4ad37ca 100644 --- a/tests/testdata/runnable:win32:exe-400000_6179792a2973254f.json +++ b/tests/testdata/runnable:win32:exe-400000_6179792a2973254f.json @@ -2,6 +2,8 @@ "headers": { "extension": "exe", "kind": "runnable", + "magic": "PE32 executable (GUI) Intel 80386 Mono/.Net assembly, for MS Windows", + "mime": "application/x-dosexec", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/runnable:win32:jar-None.json b/tests/testdata/runnable:win32:jar-None.json index e5e5b5a..cf526e6 100644 --- a/tests/testdata/runnable:win32:jar-None.json +++ b/tests/testdata/runnable:win32:jar-None.json @@ -2,6 +2,8 @@ "headers": { "extension": "jar", "kind": "runnable", + "magic": "Zip archive data, at least v1.0 to extract", + "mime": "application/zip", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/runnable:win32:lnk-PO_4700040755.jpg.lnk.json b/tests/testdata/runnable:win32:lnk-PO_4700040755.jpg.lnk.json index c2fa626..93761fe 100644 --- a/tests/testdata/runnable:win32:lnk-PO_4700040755.jpg.lnk.json +++ b/tests/testdata/runnable:win32:lnk-PO_4700040755.jpg.lnk.json @@ -2,6 +2,8 @@ "headers": { "extension": "lnk", "kind": "runnable", + "magic": "MS Windows shortcut, Item id list present, Has Description string, Has Relative path, Has Working directory, Has command line arguments, Icon number=67, ctime=Mon Jan 1 00:00:00 1601, mtime=Mon Jan 1 00:00:00 1601, atime=Mon Jan 1 00:00:00 1601, length=0, window=hidenormalshowminimized", + "mime": "application/octet-stream", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/runnable:win32:msi-installer.json b/tests/testdata/runnable:win32:msi-installer.json index c99b7c4..2823650 100644 --- a/tests/testdata/runnable:win32:msi-installer.json +++ b/tests/testdata/runnable:win32:msi-installer.json @@ -2,6 +2,8 @@ "headers": { "extension": "msi", "kind": "runnable", + "magic": "Composite Document File V2 Document, Little Endian, Os: Windows, Version 6.1, MSI Installer, Code page: 1252, Last Printed: Fri Sep 21 09:56:09 2012, Create Time/Date: Fri Sep 21 09:56:09 2012, Name of Creating Application: Windows Installer, Title: Exe to msi converter free, Author: www.exetomsi.com, Template: ;0, Last Saved By: devuser, Revision Number: {C35CF0AA-9B3F-4903-9F05-EBF606D58D3E}, Last Saved Time/Date: Tue May 21 11:56:44 2013, Number of Pages: 100, Number of Words: 0, Security: 0", + "mime": "application/x-msi", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/runnable:win32:swf-login.json b/tests/testdata/runnable:win32:swf-login.json index 6c04d00..0e4ef89 100644 --- a/tests/testdata/runnable:win32:swf-login.json +++ b/tests/testdata/runnable:win32:swf-login.json @@ -2,6 +2,8 @@ "headers": { "extension": "swf", "kind": "runnable", + "magic": "Macromedia Flash data (compressed), version 36", + "mime": "application/x-shockwave-flash", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/runnable:win64:dll-isfb_x64.json b/tests/testdata/runnable:win64:dll-isfb_x64.json index bf86f2c..deaeccf 100644 --- a/tests/testdata/runnable:win64:dll-isfb_x64.json +++ b/tests/testdata/runnable:win64:dll-isfb_x64.json @@ -2,6 +2,8 @@ "headers": { "extension": "dll", "kind": "runnable", + "magic": "PE32+ executable (DLL) (GUI) x86-64, for MS Windows", + "mime": "application/x-dosexec", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/runnable:win64:exe-cpu64.dll.json b/tests/testdata/runnable:win64:exe-cpu64.dll.json index ea54602..511d947 100644 --- a/tests/testdata/runnable:win64:exe-cpu64.dll.json +++ b/tests/testdata/runnable:win64:exe-cpu64.dll.json @@ -2,6 +2,8 @@ "headers": { "extension": "exe", "kind": "runnable", + "magic": "PE32+ executable (console) x86-64, for MS Windows", + "mime": "application/x-dosexec", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/script:win32:js-dropper.json b/tests/testdata/script:win32:js-dropper.json index 9c54ec6..f063b04 100644 --- a/tests/testdata/script:win32:js-dropper.json +++ b/tests/testdata/script:win32:js-dropper.json @@ -2,6 +2,8 @@ "headers": { "extension": "js", "kind": "script", + "magic": "ASCII text, with very long lines, with CRLF line terminators", + "mime": "text/plain", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/script:win32:jse-js_script.json b/tests/testdata/script:win32:jse-js_script.json index a530ac7..b877ef3 100644 --- a/tests/testdata/script:win32:jse-js_script.json +++ b/tests/testdata/script:win32:jse-js_script.json @@ -2,6 +2,8 @@ "headers": { "extension": "jse", "kind": "script", + "magic": "data", + "mime": "application/octet-stream", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/script:win32:ps1-reminettance.ps1.json b/tests/testdata/script:win32:ps1-reminettance.ps1.json index e62b4f5..124e86b 100644 --- a/tests/testdata/script:win32:ps1-reminettance.ps1.json +++ b/tests/testdata/script:win32:ps1-reminettance.ps1.json @@ -2,6 +2,8 @@ "headers": { "extension": "ps1", "kind": "script", + "magic": "ASCII text, with very long lines", + "mime": "text/plain", "origin": "karton.classifier", "quality": "high", "stage": "recognized", diff --git a/tests/testdata/script:win32:vbs-wn4XZ9Xs.json b/tests/testdata/script:win32:vbs-wn4XZ9Xs.json index 3a8fa8b..1ad11a7 100644 --- a/tests/testdata/script:win32:vbs-wn4XZ9Xs.json +++ b/tests/testdata/script:win32:vbs-wn4XZ9Xs.json @@ -2,6 +2,8 @@ "headers": { "extension": "vbs", "kind": "script", + "magic": "ASCII text, with very long lines, with no line terminators", + "mime": "text/plain", "origin": "karton.classifier", "quality": "high", "stage": "recognized",