Skip to content

Commit

Permalink
return magic description and mime type
Browse files Browse the repository at this point in the history
  • Loading branch information
conitrade-as committed Apr 26, 2021
1 parent 2b5cc0d commit 0e9a351
Show file tree
Hide file tree
Showing 36 changed files with 74 additions and 0 deletions.
4 changes: 4 additions & 0 deletions karton/classifier/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,10 @@ def _classify(self, task: Task) -> Optional[Dict[str, str]]:
content = cast(bytes, sample.content)

magic = task.get_payload("magic") or ""
magic_mime = task.get_payload("mime") or ""
try:
magic = pymagic.from_buffer(content)
magic_mime = pymagic.from_buffer(content, mime=True)
except Exception as ex:
self.log.warning(f"unable to get magic: {ex}")

Expand All @@ -126,6 +128,8 @@ def _classify(self, task: Task) -> Optional[Dict[str, str]]:
"type": "sample",
"stage": "recognized",
"quality": task.headers.get("quality", "high"),
"magic": magic if magic else None,
"mime": magic_mime if magic_mime else None,
}

# Is PE file?
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:7z-sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "7z",
"kind": "archive",
"magic": "7-zip archive data, version 0.4",
"mime": "application/x-7z-compressed",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:ace-sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "ace",
"kind": "archive",
"magic": "ACE archive data version 20, from Win/32, version 20 to extract, contains AV-String (unregistered), solid",
"mime": "application/octet-stream",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:bz2-sample.bz2.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "bz2",
"kind": "archive",
"magic": "bzip2 compressed data, block size = 900k",
"mime": "application/x-bzip2",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:cab-sample.cab.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "cab",
"kind": "archive",
"magic": "Microsoft Cabinet archive data, Windows 2000/XP setup, 235156 bytes, 1 file, at 0x2c +A \"RFQ and Company Profile_PDF.exe\", number 1, 12 datablocks, 0x1503 compression",
"mime": "application/vnd.ms-cab-compressed",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:cab-sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "cab",
"kind": "archive",
"magic": "Microsoft Cabinet archive data, Windows 2000/XP setup, 5099 bytes, 1 file, at 0x2c +A \"360se.ini\", number 1, 1 datablock, 0x1 compression",
"mime": "application/vnd.ms-cab-compressed",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:gz-sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "gz",
"kind": "archive",
"magic": "gzip compressed data, was \"Order 002_PDF.exe\", last modified: Thu Apr 30 23:25:26 2020, from FAT filesystem (MS-DOS, OS/2, NT)",
"mime": "application/gzip",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:iso-sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "iso",
"kind": "archive",
"magic": "ISO 9660 CD-ROM filesystem data 'DHL Shipping Document (Please Si'",
"mime": "application/x-iso9660-image",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:lz-sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "lz",
"kind": "archive",
"magic": "lzip compressed data, version: 1",
"mime": "application/x-lzip",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:rar-sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "rar",
"kind": "archive",
"magic": "RAR archive data, v5",
"mime": "application/x-rar",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:tar-sample.tar.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "tar",
"kind": "archive",
"magic": "POSIX tar archive",
"mime": "application/x-tar",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:udf-sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "udf",
"kind": "archive",
"magic": "UDF filesystem data (version 1.5) '06_25_2020'",
"mime": "application/x-iso9660-image",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:xz-sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "xz",
"kind": "archive",
"magic": "XZ compressed data",
"mime": "application/x-xz",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/archive:zip-sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "zip",
"kind": "archive",
"magic": "Zip archive data, at least v2.0 to extract",
"mime": "application/zip",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/document:win32:doc-12a_a.doc.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "doc",
"kind": "document",
"magic": "Composite Document File V2 Document, Little Endian, Os: Windows, Version 6.1, Code page: 1251, Title: , Template: Normal, Last Saved By: Z, Revision Number: 5, Name of Creating Application: Microsoft Office Word, Total Editing Time: 01:33:00, Last Printed: Sat Nov 5 19:25:00 2016, Create Time/Date: Sun Oct 30 16:29:00 2016, Last Saved Time/Date: Sat Nov 5 19:28:00 2016, Number of Pages: 9, Number of Words: 1800, Number of Characters: 10264, Security: 0",
"mime": "application/msword",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/document:win32:docx-EngWhiteKen3081.doc.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "docx",
"kind": "document",
"magic": "Microsoft Word 2007+",
"mime": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/document:win32:pdf-Doc-6235300-202005.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "pdf",
"kind": "document",
"magic": "PDF document, version 1.4",
"mime": "application/pdf",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/document:win32:rtf-document.doc.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "rtf",
"kind": "document",
"magic": "Rich Text Format data, version 1, unknown character set",
"mime": "text/rtf",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/document:win32:xls-faktura_82.xls.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "xls",
"kind": "document",
"magic": "Composite Document File V2 Document, Little Endian, Os: Windows, Version 6.2, Code page: 1252, Name of Creating Application: Microsoft Excel, Create Time/Date: Thu Mar 19 21:34:27 2020, Last Saved Time/Date: Thu Mar 19 21:47:49 2020, Security: 0",
"mime": "application/vnd.ms-excel",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/document:win32:xlsx-order_764029.xlsm.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "xlsx",
"kind": "document",
"magic": "Microsoft Excel 2007+",
"mime": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/misc:ascii-D7niSVLx.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
{
"headers": {
"kind": "ascii",
"magic": "ASCII text, with very long lines, with no line terminators",
"mime": "text/plain",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/misc:html-faktura_120207.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
{
"headers": {
"kind": "html",
"magic": "HTML document, ASCII text",
"mime": "text/html",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/runnable:android:dex-sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "dex",
"kind": "runnable",
"magic": "Dalvik dex file version 035",
"mime": "application/octet-stream",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/runnable:linux-bomba.spc.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
{
"headers": {
"kind": "runnable",
"magic": "ELF 32-bit MSB executable, SPARC, version 1 (SYSV)",
"mime": "application/x-executable",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "dll",
"kind": "runnable",
"magic": "PE32 executable (DLL) (console) Intel 80386, for MS Windows",
"mime": "application/x-dosexec",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "exe",
"kind": "runnable",
"magic": "PE32 executable (GUI) Intel 80386 Mono/.Net assembly, for MS Windows",
"mime": "application/x-dosexec",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/runnable:win32:jar-None.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "jar",
"kind": "runnable",
"magic": "Zip archive data, at least v1.0 to extract",
"mime": "application/zip",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/runnable:win32:lnk-PO_4700040755.jpg.lnk.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "lnk",
"kind": "runnable",
"magic": "MS Windows shortcut, Item id list present, Has Description string, Has Relative path, Has Working directory, Has command line arguments, Icon number=67, ctime=Mon Jan 1 00:00:00 1601, mtime=Mon Jan 1 00:00:00 1601, atime=Mon Jan 1 00:00:00 1601, length=0, window=hidenormalshowminimized",
"mime": "application/octet-stream",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/runnable:win32:msi-installer.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "msi",
"kind": "runnable",
"magic": "Composite Document File V2 Document, Little Endian, Os: Windows, Version 6.1, MSI Installer, Code page: 1252, Last Printed: Fri Sep 21 09:56:09 2012, Create Time/Date: Fri Sep 21 09:56:09 2012, Name of Creating Application: Windows Installer, Title: Exe to msi converter free, Author: www.exetomsi.com, Template: ;0, Last Saved By: devuser, Revision Number: {C35CF0AA-9B3F-4903-9F05-EBF606D58D3E}, Last Saved Time/Date: Tue May 21 11:56:44 2013, Number of Pages: 100, Number of Words: 0, Security: 0",
"mime": "application/x-msi",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/runnable:win32:swf-login.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "swf",
"kind": "runnable",
"magic": "Macromedia Flash data (compressed), version 36",
"mime": "application/x-shockwave-flash",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/runnable:win64:dll-isfb_x64.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "dll",
"kind": "runnable",
"magic": "PE32+ executable (DLL) (GUI) x86-64, for MS Windows",
"mime": "application/x-dosexec",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/runnable:win64:exe-cpu64.dll.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "exe",
"kind": "runnable",
"magic": "PE32+ executable (console) x86-64, for MS Windows",
"mime": "application/x-dosexec",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/script:win32:js-dropper.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "js",
"kind": "script",
"magic": "ASCII text, with very long lines, with CRLF line terminators",
"mime": "text/plain",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/script:win32:jse-js_script.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "jse",
"kind": "script",
"magic": "data",
"mime": "application/octet-stream",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/script:win32:ps1-reminettance.ps1.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "ps1",
"kind": "script",
"magic": "ASCII text, with very long lines",
"mime": "text/plain",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down
2 changes: 2 additions & 0 deletions tests/testdata/script:win32:vbs-wn4XZ9Xs.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"headers": {
"extension": "vbs",
"kind": "script",
"magic": "ASCII text, with very long lines, with no line terminators",
"mime": "text/plain",
"origin": "karton.classifier",
"quality": "high",
"stage": "recognized",
Expand Down

0 comments on commit 0e9a351

Please sign in to comment.