Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for embedded Yara rules #115

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions malduck/extractor/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import logging
from typing import List, cast

import yara

from ..procmem import ProcessMemory, ProcessMemoryELF, ProcessMemoryPE

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -77,6 +79,7 @@ class Extractor:

* :py:attr:`family` (see :py:attr:`extractor.Extractor.family`)
* :py:attr:`yara_rules`
* :py:attr:`yara_source`
* :py:attr:`overrides` (optional, see :py:attr:`extractor.Extractor.overrides`)

Example extractor code for Citadel:
Expand Down Expand Up @@ -115,6 +118,31 @@ def cit_login(self, p, addr, match):
- `@Extractor.rule` methods
- `@Extractor.final` methods

.. py:decoratormethod:: Extractor.yara

Decorator for extractor classes to embed Yara rules and compute the `yara_rules` property.

The above example can embed the rule as follow:

.. code-block:: Python

from malduck import Extractor

@Extractor.yara(r\"\"\"
rule possible_citadel {
strings:
$briankerbs = ...
$cit_login = ...
conditions:
all of them
}
\"\"\")
class Citadel(Extractor):
family = "citadel"
overrides = ("zeus",)

...

.. py:decoratormethod:: Extractor.string

Decorator for string-based extractor methods.
Expand Down Expand Up @@ -327,6 +355,7 @@ def is_it_really_evil(self, p):
"""

yara_rules = () #: Names of Yara rules for which handle_match is called
yara_source = None
family = None #: Extracted malware family, automatically added to "family" key for strong extraction methods
overrides = [] #: Family match overrides another match e.g. citadel overrides zeus

Expand Down Expand Up @@ -572,3 +601,19 @@ def weak(method):
)
method.weak = True
return method

@staticmethod
def yara(source):
if not isinstance(source, str):
raise TypeError("Expected string argument")

def modifier(extractor):
if not issubclass(extractor, Extractor):
raise TypeError("Expected Extractor argument")
extractor.yara_source = source
extractor.yara_rules = [
rule.identifier for rule in yara.compile(source=source)
]
return extractor

return modifier
3 changes: 3 additions & 0 deletions malduck/extractor/extractor.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class FinalExtractorMethod(ExtractorMethod[T, U]):

class Extractor:
yara_rules: Tuple[str, ...]
yara_source: Optional[str]
family: Optional[str]
overrides: List[str]
parent: ExtractionContext
Expand Down Expand Up @@ -151,3 +152,5 @@ class Extractor:
) -> ExtractorMethod[T, ProcessMemoryELF]: ...
@staticmethod
def weak(method: ExtractorMethod[T, U]) -> ExtractorMethod[T, U]: ...
@staticmethod
def yara(source: str) -> Callable[[Extractor], Extractor]: ...
21 changes: 16 additions & 5 deletions malduck/extractor/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,16 @@ class ExtractorModules:

:param modules_path: Path with module files (Extractor classes and Yara files, default '~/.malduck')
:type modules_path: str
:type modules_path: list
"""

def __init__(self, modules_path: Optional[str] = None) -> None:
def __init__(
self,
modules_path: Optional[str] = None,
) -> None:
if modules_path is None:
modules_path = os.path.join(os.path.expanduser("~"), ".malduck")
if not os.path.exists(modules_path):
os.makedirs(modules_path)
# Load Yara rules
self.rules: Yara = Yara.from_dir(modules_path)

# Preload modules
loaded_modules = load_modules(modules_path, onerror=self.on_error)
self.extractors: List[Type[Extractor]] = Extractor.__subclasses__()
Expand All @@ -44,6 +45,16 @@ def __init__(self, modules_path: Optional[str] = None) -> None:
)
self.override_paths = make_override_paths(self.extractors)

# Load Yara rules
self.rules: Yara = Yara.from_dir_and_sources(
path=modules_path,
sources={
extractor.family: extractor.yara_source
for extractor in self.extractors
if extractor.yara_source and extractor.family
},
)

def on_error(self, exc: Exception, module_name: str) -> None:
"""
Handler for all exceptions raised during module load
Expand Down
81 changes: 62 additions & 19 deletions malduck/yara.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,25 @@ class Yara:
:type strings: dict or str or :class:`YaraString`
:param condition: Yara rule condition (default: "any of them")
:type condition: str
:param sources: Dictionary of {"namespace": "rule_source"}. See also :py:meth:`Yara.from_source`.
:type rule_paths: dict
"""

def __init__(
self, rule_paths=None, name="r", strings=None, condition="any of them"
self,
rule_paths=None,
name="r",
strings=None,
condition="any of them",
sources=None,
):
if rule_paths:
self.rules = yara.compile(filepaths=rule_paths)
if rule_paths or sources:
if not sources:
sources = {}
for namespace in rule_paths:
with open(rule_paths[namespace], "r") as source:
sources[namespace] = source.read()
self.rules = yara.compile(sources=sources)
return

if not strings:
Expand Down Expand Up @@ -143,7 +155,7 @@ def __init__(
self.rules = yara.compile(source=yara_source)

@staticmethod
def from_dir(path, recursive=True, followlinks=True):
def from_dir_and_sources(path=None, recursive=True, followlinks=True, sources=None):
"""
Find rules (recursively) in specified path. Supported extensions: \\*.yar, \\*.yara

Expand All @@ -153,24 +165,55 @@ def from_dir(path, recursive=True, followlinks=True):
:type recursive: bool
:param followlinks: Follow symbolic links (default: enabled)
:type followlinks: bool
:param sources: Dictionary of {"namespace": "rule_source"}
:type sources: dict
:rtype: :class:`Yara`
"""
rule_paths: Dict[str, str] = {}
for root, _, files in os.walk(path, followlinks=followlinks):
for fname in files:
if not fname.endswith(".yar") and not fname.endswith(".yara"):
continue
ruleset_name = os.path.splitext(os.path.basename(fname))[0]
ruleset_path = os.path.join(root, fname)
if ruleset_name in rule_paths:
log.warning(
f"Yara file name collision - {rule_paths[ruleset_name]} "
f"overridden by {ruleset_path}"
)
rule_paths[ruleset_name] = ruleset_path
if not recursive:
break
return Yara(rule_paths=rule_paths)
if path:
for root, _, files in os.walk(path, followlinks=followlinks):
for fname in files:
if not fname.endswith(".yar") and not fname.endswith(".yara"):
continue
ruleset_name = os.path.splitext(os.path.basename(fname))[0]
ruleset_path = os.path.join(root, fname)
if ruleset_name in rule_paths:
log.warning(
f"Yara file name collision - {rule_paths[ruleset_name]} "
f"overridden by {ruleset_path}"
)
rule_paths[ruleset_name] = ruleset_path
if not recursive:
break
return Yara(rule_paths=rule_paths, sources=sources)

@staticmethod
def from_dir(path, recursive=True, followlinks=True):
"""
Find rules (recursively) in specified path. Supported extensions: \\*.yar, \\*.yara

:param path: Root path for searching
:type path: str
:param recursive: Search recursively (default: enabled)
:type recursive: bool
:param followlinks: Follow symbolic links (default: enabled)
:type followlinks: bool
:rtype: :class:`Yara`
"""
return Yara.from_dir_and_sources(
path=path, recursive=recursive, followlinks=followlinks
)

@staticmethod
def from_sources(sources):
"""
Loads rules for the specified namespaces.

:param sources: Dictionary of {"namespace": "rule_source"}
:type sources: dict
:rtype: :class:`Yara`
"""
return Yara.from_dir_and_sources(sources=sources)

def match(self, offset_mapper=None, extended=False, **kwargs):
"""
Expand Down
10 changes: 10 additions & 0 deletions malduck/yara.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,21 @@ class Yara:
str, "YaraString", Dict[str, Union[str, "YaraString"]], None
] = None,
condition: str = "any of them",
sources: Optional[Dict[str, str]] = None,
) -> None: ...
@staticmethod
def from_dir_and_sources(
path: Optional[str] = None,
recursive: bool = True,
followlinks: bool = True,
sources: Optional[Dict[str, str]] = None,
) -> "Yara": ...
@staticmethod
def from_dir(
path: str, recursive: bool = True, followlinks: bool = True
) -> "Yara": ...
@staticmethod
def from_sources(sources: Dict[str, str]) -> "Yara": ...
# match(...)
# match(offset_mapper, ...)
# match(offset_mapper, extended=False, ...)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
name="malduck",
version="4.4.0",
description="Malduck is your ducky companion in malware analysis journeys",
long_description=open("README.md").read(),
long_description=open("README.md", encoding="utf8").read(),
long_description_content_type="text/markdown",
author="CERT Polska",
author_email="info@cert.pl",
Expand All @@ -20,7 +20,7 @@
},
license="GPLv3",
include_package_data=True,
install_requires=open("requirements.txt").read().splitlines(),
install_requires=open("requirements.txt", encoding="utf8").read().splitlines(),
url="https://github.com/CERT-Polska/malduck",
classifiers=[
"Programming Language :: Python :: 3",
Expand Down
1 change: 1 addition & 0 deletions tests/files/embedded.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Start with this and nothing else...
1 change: 1 addition & 0 deletions tests/files/modules/embedded/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .embedded import Embedded
18 changes: 18 additions & 0 deletions tests/files/modules/embedded/embedded.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from malduck.extractor import Extractor
from malduck import base64, procmempe

@Extractor.yara(r"""
rule embedded_test
{
strings:
$start = "Start with this and nothing else..."
condition:
all of them and $start at 0
}
""")
class Embedded(Extractor):
family = "embedded"

@Extractor.final
def embedded(self, p):
return {"embedded": True}
9 changes: 9 additions & 0 deletions tests/test_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,12 @@ def test_multirules():
'matched': ['v2'],
'third': ['ThIrD string']
}]


def test_embedded():
modules = ExtractorModules("tests/files/modules")
p = procmem.from_file("tests/files/embedded.txt")
assert p.extract(modules) == [{
"embedded": True,
"family": "embedded",
}]