From 5c76a2d95fe2ba74c6c1a7a65e3dddca55969699 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Mon, 19 Jun 2023 15:23:40 +0200 Subject: [PATCH 1/6] Remove typing cast that slowed the working of integer types (#95) --- malduck/ints.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/malduck/ints.py b/malduck/ints.py index 4b41d52..d49642c 100644 --- a/malduck/ints.py +++ b/malduck/ints.py @@ -1,6 +1,6 @@ from abc import ABCMeta, abstractmethod from struct import error, pack, unpack_from -from typing import Any, Callable, Generic, Iterator, Tuple, Type, TypeVar, Union, cast +from typing import Any, Generic, Iterator, Tuple, Type, TypeVar, Union from .bits import rol @@ -166,8 +166,7 @@ def __new__(cls: MetaIntType, value: Any) -> "IntType": value = int(value) & cls.mask if cls.signed: value |= -(value & cls.invert_mask) - construct = cast(Callable[[MetaIntType, Any], IntType], int.__new__) - return construct(cls, value) + return int.__new__(cls, value) # type: ignore def __add__(self, other: Any) -> "IntType": res = super().__add__(other) From ddd9af4fc2de7397fd57ca2889a9abcc3baef700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Mon, 19 Jun 2023 15:25:39 +0200 Subject: [PATCH 2/6] Remove strct pin from dnfile dependency (#93) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 393d3e0..1f910a3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ capstone>=4.0.1 yara-python==4.2.3 typing-extensions>=3.7.4.2 cryptography>=3.1 -dnfile==0.11.0 +dnfile>=0.11.0 From 3540bd402c57c92c0fdcc82288b7563c3dd43ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Mon, 19 Jun 2023 15:29:26 +0200 Subject: [PATCH 3/6] Fix yara-python compatibilty break (#94) --- malduck/yara.py | 42 +++++++++++++++++++++++++++--------------- malduck/yara.pyi | 2 +- requirements.txt | 2 +- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/malduck/yara.py b/malduck/yara.py index c4ac5bc..06986f3 100644 --- a/malduck/yara.py +++ b/malduck/yara.py @@ -259,25 +259,37 @@ def _map_matches(self, matches, offset_mapper): def _map_strings(self, strings, offset_mapper): mapped_strings = defaultdict(list) - for offset, identifier, content in strings: + for yara_string in strings: + # yara-python 4.3.0 broke compatibilty and started returning a StringMatch object + if type(yara_string) is tuple: + offsets = [yara_string[0]] + identifier = yara_string[1] + contents = [yara_string[2]] + else: + offsets = [x.offset for x in yara_string.instances] + identifier = yara_string.identifier + contents = [x.matched_data for x in yara_string.instances] + # Get identifier without "$" and group identifier real_ident, group_ident = self._parse_string_identifier(identifier) - # Map offset if offset_mapper is provided - if offset_mapper is not None: - _offset = offset_mapper(offset, len(content)) - if _offset is None: - # Ignore match for unmapped region - continue - offset = _offset - # Register offset for full identifier - mapped_strings[real_ident].append( - YaraStringMatch(real_ident, offset, content) - ) - # Register offset for grouped identifier - if real_ident != group_ident: - mapped_strings[group_ident].append( + + for offset, content in zip(offsets, contents): + # Map offset if offset_mapper is provided + if offset_mapper is not None: + _offset = offset_mapper(offset, len(content)) + if _offset is None: + # Ignore match for unmapped region + continue + offset = _offset + # Register offset for full identifier + mapped_strings[real_ident].append( YaraStringMatch(real_ident, offset, content) ) + # Register offset for grouped identifier + if real_ident != group_ident: + mapped_strings[group_ident].append( + YaraStringMatch(real_ident, offset, content) + ) return mapped_strings def _parse_string_identifier(self, identifier): diff --git a/malduck/yara.pyi b/malduck/yara.pyi index 6d4124f..c3ea6d6 100644 --- a/malduck/yara.pyi +++ b/malduck/yara.pyi @@ -20,7 +20,7 @@ from typing_extensions import Literal, Protocol T = TypeVar("T") OffsetMapper = Callable[[Optional[int], Optional[int]], Optional[int]] -YaraRulesString = Tuple[int, str, bytes] +YaraRulesString = Union[Tuple[int, str, bytes], Any] class YaraRulesMatch(Protocol): meta: Dict[str, str] diff --git a/requirements.txt b/requirements.txt index 1f910a3..b3b9436 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ pefile>=2022.5.30 pyelftools pycryptodomex>=3.8.2 capstone>=4.0.1 -yara-python==4.2.3 +yara-python typing-extensions>=3.7.4.2 cryptography>=3.1 dnfile>=0.11.0 From 64fc8b76adcdcf070588fd401621bab2f92dcc6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Srokosz?= Date: Mon, 19 Jun 2023 15:51:29 +0200 Subject: [PATCH 4/6] Bump version: v4.3.2 (#98) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9d6cef9..a6cf862 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="malduck", - version="4.3.1", + version="4.3.2", description="Malduck is your ducky companion in malware analysis journeys", long_description=open("README.md").read(), long_description_content_type="text/markdown", From 98bd0daa6153d21829fc014a72df774a57bd0b2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Srokosz?= Date: Wed, 26 Jul 2023 18:12:21 +0200 Subject: [PATCH 5/6] Bump Sphinx version and set correct docs version (#102) --- docs/conf.py | 2 +- docs/requirements.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index d873928..ba93e39 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'CERT Polska' # The full version, including alpha/beta/rc tags -version = '4.2.0' +version = '4.3.2' # -- General configuration --------------------------------------------------- diff --git a/docs/requirements.txt b/docs/requirements.txt index df0e23a..c9b4cf2 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -Sphinx==2.1.2 +Sphinx==6.2.1 sphinx-autodoc-annotation==1.0.post1 sphinx-autodoc-typehints==1.6.0 -sphinx-rtd-theme==0.5.0 +sphinx-rtd-theme==1.2.2 From 930a999287c13cbb80e66291ad7db4048dc22c0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Srokosz?= Date: Wed, 26 Jul 2023 18:17:23 +0200 Subject: [PATCH 6/6] Refactored malduck.extractor (#100) --- docs/conf.py | 2 +- docs/extractor.rst | 2 +- malduck/extractor/__init__.py | 5 +- malduck/extractor/config_utils.py | 64 ++++++ malduck/extractor/extract_manager.py | 308 ++++++++++++--------------- malduck/extractor/extractor.pyi | 6 +- malduck/extractor/loaders.py | 63 ------ malduck/extractor/modules.py | 175 +++++++++++++++ malduck/procmem/binmem.py | 3 + malduck/procmem/procmem.py | 3 + setup.py | 2 +- 11 files changed, 390 insertions(+), 243 deletions(-) create mode 100644 malduck/extractor/config_utils.py delete mode 100644 malduck/extractor/loaders.py create mode 100644 malduck/extractor/modules.py diff --git a/docs/conf.py b/docs/conf.py index ba93e39..60f538d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'CERT Polska' # The full version, including alpha/beta/rc tags -version = '4.3.2' +version = '4.4.0' # -- General configuration --------------------------------------------------- diff --git a/docs/extractor.rst b/docs/extractor.rst index 9c2c3aa..7dddef3 100644 --- a/docs/extractor.rst +++ b/docs/extractor.rst @@ -17,5 +17,5 @@ Module interface Internally used classes and routines ------------------------------------ -.. autoclass:: malduck.extractor.extract_manager.ProcmemExtractManager +.. autoclass:: malduck.extractor.extract_manager.ExtractionContext :members: diff --git a/malduck/extractor/__init__.py b/malduck/extractor/__init__.py index f1920e1..e1575d8 100644 --- a/malduck/extractor/__init__.py +++ b/malduck/extractor/__init__.py @@ -1,4 +1,5 @@ -from .extract_manager import ExtractManager, ExtractorModules +from .extract_manager import ExtractManager from .extractor import Extractor +from .modules import ExtractorModules -__all__ = ["ExtractManager", "ExtractorModules", "Extractor"] +__all__ = ["ExtractManager", "Extractor", "ExtractorModules"] diff --git a/malduck/extractor/config_utils.py b/malduck/extractor/config_utils.py new file mode 100644 index 0000000..6d4b32b --- /dev/null +++ b/malduck/extractor/config_utils.py @@ -0,0 +1,64 @@ +import logging +from typing import Any, Dict + +log = logging.getLogger(__name__) + +Config = Dict[str, Any] +ConfigSet = Dict[str, Config] + + +def is_config_better(base_config: Config, new_config: Config) -> bool: + """ + Checks whether new config looks more reliable than base. + Currently just checking the amount of non-empty keys. + """ + base = [(k, v) for k, v in base_config.items() if v] + new = [(k, v) for k, v in new_config.items() if v] + return len(new) > len(base) + + +def encode_for_json(data: Any) -> Any: + if isinstance(data, bytes): + return data.decode() + elif isinstance(data, list) or isinstance(data, tuple): + return [encode_for_json(item) for item in data] + elif isinstance(data, dict): + return {key: encode_for_json(value) for key, value in data.items()} + else: + return data + + +def sanitize_config(config: Config) -> Config: + """ + Sanitize static configuration by removing empty strings/collections + + :param config: Configuration to sanitize + :return: Sanitized configuration + """ + return {k: v for k, v in config.items() if v in [0, False] or v} + + +def apply_config_part(base_config: Config, new_config_part: Config) -> Config: + """ + Apply new part of static configuration. Used internally. + + :param base_config: Base configuration + :param new_config_part: Changes to apply + :return: Merged configuration + """ + config = dict(base_config) + for k, v in new_config_part.items(): + if k not in config: + config[k] = v + elif config[k] == v: + continue + elif isinstance(config[k], list): + for el in v: + if el not in config[k]: + config[k] = config[k] + [el] + else: + raise RuntimeError( + f"Extractor tries to override '{config[k]}' " + f"value of '{k}' with '{v}'" + ) + return config diff --git a/malduck/extractor/extract_manager.py b/malduck/extractor/extract_manager.py index 2bf70bf..af3f77a 100644 --- a/malduck/extractor/extract_manager.py +++ b/malduck/extractor/extract_manager.py @@ -1,121 +1,24 @@ import json import logging -import os import warnings -from typing import Any, Dict, List, Optional, Type, Union +from typing import Any, Dict, List, Optional, Type -from ..procmem import ProcessMemory +from ..procmem import ProcessMemory, ProcessMemoryELF, ProcessMemoryPE +from ..procmem.binmem import ProcessMemoryBinary from ..yara import Yara, YaraRuleOffsets, YaraRulesetMatch +from .config_utils import ( + Config, + apply_config_part, + encode_for_json, + is_config_better, + sanitize_config, +) from .extractor import Extractor -from .loaders import load_modules +from .modules import ExtractorModules log = logging.getLogger(__name__) -Config = Dict[str, Any] - -__all__ = ["ExtractManager", "ExtractorModules"] - - -def is_config_better(base_config: Config, new_config: Config) -> bool: - """ - Checks whether new config looks more reliable than base. - Currently just checking the amount of non-empty keys. - """ - base = [(k, v) for k, v in base_config.items() if v] - new = [(k, v) for k, v in new_config.items() if v] - return len(new) > len(base) - - -def encode_for_json(data: Any) -> Any: - if isinstance(data, bytes): - return data.decode() - elif isinstance(data, list) or isinstance(data, tuple): - return [encode_for_json(item) for item in data] - elif isinstance(data, dict): - return {key: encode_for_json(value) for key, value in data.items()} - else: - return data - - -def sanitize_config(config: Config) -> Config: - """ - Sanitize static configuration by removing empty strings/collections - - :param config: Configuration to sanitize - :return: Sanitized configuration - """ - return {k: v for k, v in config.items() if v in [0, False] or v} - - -def merge_configs(base_config: Config, new_config: Config) -> Config: - """ - Merge static configurations. - Used internally. Removes "family" key from the result, which is set explicitly by ExtractManager.push_config - - :param base_config: Base configuration - :param new_config: Changes to apply - :return: Merged configuration - """ - config = dict(base_config) - for k, v in new_config.items(): - if k == "family": - continue - if k not in config: - config[k] = v - elif config[k] == v: - continue - elif isinstance(config[k], list): - for el in v: - if el not in config[k]: - config[k] = config[k] + [el] - else: - raise RuntimeError( - f"Extractor tries to override '{config[k]}' " - f"value of '{k}' with '{v}'" - ) - return config - - -class ExtractorModules: - """ - Configuration object with loaded Extractor modules for ExtractManager - - :param modules_path: Path with module files (Extractor classes and Yara files, default '~/.malduck') - :type modules_path: str - """ - - def __init__(self, modules_path: Optional[str] = None) -> None: - if modules_path is None: - modules_path = os.path.join(os.path.expanduser("~"), ".malduck") - if not os.path.exists(modules_path): - os.makedirs(modules_path) - # Load Yara rules - self.rules: Yara = Yara.from_dir(modules_path) - # Preload modules - loaded_modules = load_modules(modules_path, onerror=self.on_error) - self.extractors: List[Type[Extractor]] = Extractor.__subclasses__() - - loaded_extractors = [x.__module__ for x in self.extractors] - - for module in loaded_modules.values(): - module_name = module.__name__ - if not any(x.startswith(module_name) for x in loaded_extractors): - warnings.warn( - f"The extractor engine couldn't import any Extractors from module {module_name}. Make sure the Extractor class is imported into __init__.py", - ) - - def on_error(self, exc: Exception, module_name: str) -> None: - """ - Handler for all exceptions raised during module load - - Override this method if you want to set your own error handler. - - :param exc: Exception object - :type exc: :class:`Exception` - :param module_name: Name of module which raised the exception - :type module_name: str - """ - log.warning("%s not loaded: %s", module_name, exc) +__all__ = ["ExtractManager"] class ExtractManager: @@ -128,6 +31,10 @@ class ExtractManager: def __init__(self, modules: ExtractorModules) -> None: self.modules = modules + self.binary_classes: List[Type[ProcessMemoryBinary]] = [ + ProcessMemoryPE, + ProcessMemoryELF, + ] self.configs: Dict[str, Config] = {} @property @@ -192,27 +99,97 @@ def push_file(self, filepath: str, base: int = 0) -> Optional[str]: :type filepath: str :param base: Memory dump base address :type base: int - :return: Family name if ripped successfully and provided better configuration than previous files. - Returns None otherwise. + :return: Detected family if configuration looks better than already stored one """ log.debug("Started extraction of file %s:%x", filepath, base) with ProcessMemory.from_file(filepath, base=base) as p: return self.push_procmem(p, rip_binaries=True) - def push_config(self, family: str, config: Config) -> Optional[str]: - config["family"] = family - if family not in self.configs: - self.configs[family] = config - return family - else: - base_config = self.configs[family] - if is_config_better(base_config, config): - log.debug("Config looks better") + def match_procmem(self, p: ProcessMemory) -> YaraRulesetMatch: + """ + Performs Yara matching on ProcessMemory using modules + bound with current ExtractManager. + """ + matches = p.yarap(self.rules, extended=True) + log.debug("Matched rules: %s", ",".join(list(matches.keys()))) + return matches + + def carve_procmem(self, p: ProcessMemory) -> List[ProcessMemoryBinary]: + """ + Carves binaries from ProcessMemory to try configuration extraction + using every possible address mapping. + """ + binaries = [] + for binclass in self.binary_classes: + carved_bins = list(binclass.load_binaries_from_memory(p)) + for carved_bin in carved_bins: + log.debug( + f"carve: Found {carved_bin.__class__.__name__} " + f"at offset {carved_bin.regions[0].offset}" + ) + binaries += carved_bins + return binaries + + def push_config(self, config: Config) -> bool: + if not config.get("family"): + return False + + family = config["family"] + if family in self.configs: + if is_config_better(base_config=self.configs[family], new_config=config): self.configs[family] = config - return family + log.debug("%s config looks better than previous one", family) + return True else: - log.debug("Config doesn't look better - ignoring.") - return None + log.debug("%s config doesn't look better than previous one", family) + return False + + if family in self.modules.override_paths: + # 'citadel' > 'zeus' + # If 'zeus' appears but we have already 'citadel', we should ignore 'zeus' + # Otherwise we should get 'citadel' instead of 'zeus' + for stored_family in self.configs.keys(): + if stored_family == family: + continue + score = self.modules.compare_family_overrides(family, stored_family) + if score == -1: + del self.configs[stored_family] + self.configs[family] = config + log.debug( + "%s config looks better (overrides %s)", family, stored_family + ) + return True + elif score == 1: + log.debug( + "%s config doesn't look better than previous one (overridden by %s)", + family, + stored_family, + ) + return False + + log.debug("New %s config collected", family) + self.configs[family] = config + return True + + def _extract_procmem(self, p: ProcessMemory, matches) -> Optional[str]: + log.debug("%s - ripping...", repr(p)) + # Create extraction context for single file + manager = ExtractionContext(parent=self) + # Map offset matches to VA using procmem address mapping + va_matches = matches.remap(p.p2v) + # Push ProcessMemory for extraction with mapped Yara matches + manager.push_procmem(p, _matches=va_matches) + # Get final configurations + config = manager.collected_config + if config.get("family"): + log.debug("%s - found %s!", repr(p), config.get("family")) + if self.push_config(config): + return config["family"] + else: + return None + else: + log.debug("%s - no luck.", repr(p)) + return None def push_procmem( self, p: ProcessMemory, rip_binaries: bool = False @@ -225,52 +202,22 @@ def push_procmem( :param rip_binaries: Look for binaries (PE, ELF) in provided ProcessMemory and try to perform extraction using specialized variants (ProcessMemoryPE, ProcessMemoryELF) :type rip_binaries: bool (default: False) - :return: Family name if ripped successfully and provided better configuration than previous procmems. - Returns None otherwise. + :return: Detected family if configuration looks better than already stored one """ - from ..procmem import ProcessMemoryELF, ProcessMemoryPE - from ..procmem.binmem import ProcessMemoryBinary - - matches = p.yarav(self.rules, extended=True) - + matches = self.match_procmem(p) if not matches: log.debug("No Yara matches.") return None - binaries: List[Union[ProcessMemory, ProcessMemoryBinary]] = [p] - if rip_binaries: - binaries += list(ProcessMemoryPE.load_binaries_from_memory(p)) - binaries += list(ProcessMemoryELF.load_binaries_from_memory(p)) - - def fmt_procmem(p: ProcessMemory) -> str: - procmem_type = "IMG" if getattr(p, "is_image", False) else "DMP" - return f"{p.__class__.__name__}:{procmem_type}:{p.imgbase:x}" - - def extract_config(procmem: ProcessMemory) -> Optional[str]: - log.debug("%s - ripping...", fmt_procmem(procmem)) - extractor = ProcmemExtractManager(self) - extractor.push_procmem(procmem, _matches=matches.remap(procmem.p2v)) - if extractor.family: - log.debug("%s - found %s!", fmt_procmem(procmem), extractor.family) - return self.push_config(extractor.family, extractor.config) - else: - log.debug("%s - No luck.", fmt_procmem(procmem)) - return None - - # 'list()' for prettier logs - log.debug("Matched rules: %s", list(matches.keys())) - - ripped_family = None + binaries = self.carve_procmem(p) if rip_binaries else [] + family = self._extract_procmem(p, matches) for binary in binaries: - found_family = extract_config(binary) - if found_family is not None: - ripped_family = found_family - if isinstance(binary, ProcessMemoryBinary) and binary.image is not None: - found_family = extract_config(binary.image) - if found_family is not None: - ripped_family = found_family - return ripped_family + family = self._extract_procmem(binary, matches) or family + binary_image = binary.image + if binary_image: + family = self._extract_procmem(binary_image, matches) or family + return family @property def config(self) -> List[Config]: @@ -280,7 +227,7 @@ def config(self) -> List[Config]: return [config for family, config in self.configs.items()] -class ProcmemExtractManager: +class ExtractionContext: """ Single-dump extraction context (single family) """ @@ -290,7 +237,11 @@ def __init__(self, parent: ExtractManager) -> None: self.collected_config: Config = {} self.globals: Dict[str, Any] = {} self.parent = parent #: Bound ExtractManager instance - self.family = None #: Matched family + + @property + def family(self) -> Optional[str]: + """Matched family""" + return self.collected_config.get("family") def on_extractor_error( self, exc: Exception, extractor: Extractor, method_name: str @@ -376,14 +327,27 @@ def push_config(self, config: Config, extractor: Extractor) -> None: sorted(config.keys()), ) - self.collected_config = merge_configs(self.collected_config, config) - - if "family" in config and ( - not self.family - or (self.family != extractor.family and self.family in extractor.overrides) - ): - self.family = config["family"] - log.debug("%s tells it's %s", extractor.__class__.__name__, self.family) + if "family" in config: + log.debug( + "%s tells it's %s", extractor.__class__.__name__, config["family"] + ) + if ( + "family" in self.collected_config + and self.collected_config["family"] != config["family"] + ): + overrides = self.parent.modules.compare_family_overrides( + config["family"], self.collected_config["family"] + ) + if not overrides: + raise RuntimeError( + f"Ripped both {self.collected_config['family']} and {config['family']} " + f"from the same ProcessMemory which is not expected" + ) + if overrides == -1: + self.collected_config["family"] = config["family"] + else: + config["family"] = self.collected_config["family"] + self.collected_config = apply_config_part(self.collected_config, config) @property def config(self) -> Config: diff --git a/malduck/extractor/extractor.pyi b/malduck/extractor/extractor.pyi index 56a05c4..3aff06e 100644 --- a/malduck/extractor/extractor.pyi +++ b/malduck/extractor/extractor.pyi @@ -18,7 +18,7 @@ from typing_extensions import Protocol from ..procmem import ProcessMemory, ProcessMemoryELF, ProcessMemoryPE from ..yara import YaraRuleMatch, YaraStringMatch -from .extract_manager import ProcmemExtractManager +from .extract_manager import ExtractionContext Config = Dict[str, Any] @@ -95,8 +95,8 @@ class Extractor: yara_rules: Tuple[str, ...] family: Optional[str] overrides: List[str] - parent: ProcmemExtractManager - def __init__(self, parent: ProcmemExtractManager) -> None: ... + parent: ExtractionContext + def __init__(self, parent: ExtractionContext) -> None: ... def push_procmem(self, procmem: ProcessMemory, **info): ... def push_config(self, config): ... @property diff --git a/malduck/extractor/loaders.py b/malduck/extractor/loaders.py deleted file mode 100644 index 758ad75..0000000 --- a/malduck/extractor/loaders.py +++ /dev/null @@ -1,63 +0,0 @@ -import importlib.util -import logging -import pkgutil -import sys -from importlib.abc import FileLoader, PathEntryFinder -from typing import Any, Callable, Dict, Optional, cast - -log = logging.getLogger(__name__) - - -def import_module_by_finder(finder: PathEntryFinder, module_name: str) -> Any: - """ - Imports module from arbitrary path using importer returned by pkgutil.iter_modules - """ - if module_name in sys.modules: - return sys.modules[module_name] - - # https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly - module_spec = finder.find_spec(module_name) - if module_spec is None or module_spec.loader is None: - raise Exception("Couldn't find module spec for %s", module_name) - module = importlib.util.module_from_spec(module_spec) - sys.modules[module_name] = module - try: - loader: FileLoader = cast(FileLoader, module_spec.loader) - loader.exec_module(module) - except BaseException: - del sys.modules[module_name] - raise - return module - - -def load_modules( - search_path: str, onerror: Optional[Callable[[Exception, str], None]] = None -) -> Dict[str, Any]: - """ - Loads plugin modules under specified paths - - .. note:: - - This method is considered to be used internally (see also :class:`extractor.ExtractorModules`) - - :param search_path: Path searched for modules - :type search_path: str - :param onerror: Exception handler (default: ignore exceptions) - :return: dict {name: module} - """ - modules: Dict[str, Any] = {} - for finder, module_name, is_pkg in pkgutil.iter_modules( - [search_path], "malduck.extractor.modules." - ): - if not is_pkg: - continue - if module_name in modules: - log.warning("Module collision - %s overridden", module_name) - try: - modules[module_name] = import_module_by_finder( - cast(PathEntryFinder, finder), module_name - ) - except Exception as exc: - if onerror: - onerror(exc, module_name) - return modules diff --git a/malduck/extractor/modules.py b/malduck/extractor/modules.py new file mode 100644 index 0000000..a602c22 --- /dev/null +++ b/malduck/extractor/modules.py @@ -0,0 +1,175 @@ +import importlib.util +import logging +import os +import pkgutil +import sys +import warnings +from collections import defaultdict +from importlib.abc import FileLoader, PathEntryFinder +from typing import Any, Callable, DefaultDict, Dict, List, Optional, Type, cast + +from ..yara import Yara +from .extractor import Extractor + +log = logging.getLogger(__name__) + + +class ExtractorModules: + """ + Configuration object with loaded Extractor modules for ExtractManager + + :param modules_path: Path with module files (Extractor classes and Yara files, default '~/.malduck') + :type modules_path: str + """ + + def __init__(self, modules_path: Optional[str] = None) -> None: + if modules_path is None: + modules_path = os.path.join(os.path.expanduser("~"), ".malduck") + if not os.path.exists(modules_path): + os.makedirs(modules_path) + # Load Yara rules + self.rules: Yara = Yara.from_dir(modules_path) + # Preload modules + loaded_modules = load_modules(modules_path, onerror=self.on_error) + self.extractors: List[Type[Extractor]] = Extractor.__subclasses__() + + loaded_extractors = [x.__module__ for x in self.extractors] + + for module in loaded_modules.values(): + module_name = module.__name__ + if not any(x.startswith(module_name) for x in loaded_extractors): + warnings.warn( + f"The extractor engine couldn't import any Extractors from module {module_name}. " + f"Make sure the Extractor class is imported into __init__.py", + ) + self.override_paths = make_override_paths(self.extractors) + + def on_error(self, exc: Exception, module_name: str) -> None: + """ + Handler for all exceptions raised during module load + + Override this method if you want to set your own error handler. + + :param exc: Exception object + :type exc: :class:`Exception` + :param module_name: Name of module which raised the exception + :type module_name: str + """ + log.warning("%s not loaded: %s", module_name, exc) + + def compare_family_overrides(self, first: str, second: str) -> int: + """ + Checks which family supersedes which. Relations can be transitive, + so ExtractorModules builds all possible paths and checks the order. + If there is no such relationship between families, function returns None. + """ + if first not in self.override_paths or second not in self.override_paths: + return 0 + for path in self.override_paths[first]: + try: + if path.index(first) < path.index(second): + return -1 + else: + return 1 + except ValueError: + pass + return 0 + + +def make_override_paths(extractors: List[Type[Extractor]]) -> Dict[str, List[str]]: + # Make override trees and get roots + overrides: DefaultDict[str, List[str]] = defaultdict(list) + parents = set() + children = set() + for extractor in extractors: + if extractor.family is None: + continue + for overridden_family in extractor.overrides: + overrides[extractor.family].append(overridden_family) + parents.add(extractor.family) + children.add(overridden_family) + roots = parents.difference(children) + unvisited = parents.union(children) + # Perform DFS and collect all override paths + override_paths = defaultdict(list) + + def make_override_path(node, visited, current_path=None): + if node in visited: + raise RuntimeError( + f"Override cycle detected: {node} already visited during tree traversal" + ) + visited.add(node) + unvisited.remove(node) + current_path = [*(current_path or []), node] + if not overrides[node]: + # Leaf: override path is complete + for family in current_path: + override_paths[family].append(current_path) + else: + # Not a leaf: go deeper + for family in overrides[node]: + make_override_path(family, visited=visited, current_path=current_path) + + for root in roots: + make_override_path(root, visited=set()) + # Root undetected + if unvisited: + raise RuntimeError( + f"Override cycle detected: {list(unvisited)} not visited during tree traversal" + ) + return dict(override_paths) + + +def import_module_by_finder(finder: PathEntryFinder, module_name: str) -> Any: + """ + Imports module from arbitrary path using importer returned by pkgutil.iter_modules + """ + if module_name in sys.modules: + return sys.modules[module_name] + + # https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly + module_spec = finder.find_spec(module_name) + if module_spec is None or module_spec.loader is None: + raise Exception("Couldn't find module spec for %s", module_name) + module = importlib.util.module_from_spec(module_spec) + sys.modules[module_name] = module + try: + loader: FileLoader = cast(FileLoader, module_spec.loader) + loader.exec_module(module) + except BaseException: + del sys.modules[module_name] + raise + return module + + +def load_modules( + search_path: str, onerror: Optional[Callable[[Exception, str], None]] = None +) -> Dict[str, Any]: + """ + Loads plugin modules under specified paths + + .. note:: + + This method is considered to be used internally (see also :class:`extractor.ExtractorModules`) + + :param search_path: Path searched for modules + :type search_path: str + :param onerror: Exception handler (default: ignore exceptions) + :return: dict {name: module} + """ + modules: Dict[str, Any] = {} + for finder, module_name, is_pkg in pkgutil.iter_modules( + [search_path], "malduck.extractor.modules." + ): + if not is_pkg: + continue + if module_name in modules: + log.warning("Module collision - %s overridden", module_name) + try: + modules[module_name] = import_module_by_finder( + cast(PathEntryFinder, finder), module_name + ) + except Exception as exc: + if onerror: + onerror(exc, module_name) + return modules diff --git a/malduck/procmem/binmem.py b/malduck/procmem/binmem.py index 505c763..668fb47 100644 --- a/malduck/procmem/binmem.py +++ b/malduck/procmem/binmem.py @@ -86,3 +86,6 @@ def is_image_loaded_as_memdump(self) -> bool: Used by `detect_image` """ raise NotImplementedError() + + def __repr__(self): + return f"{self.__class__.__name__}:{'IMG' if self.is_image else 'DMP'}:{hex(self.imgbase)[2:]}" diff --git a/malduck/procmem/procmem.py b/malduck/procmem/procmem.py index 7e79748..36791cf 100644 --- a/malduck/procmem/procmem.py +++ b/malduck/procmem/procmem.py @@ -895,5 +895,8 @@ def findmz(self, addr): return addr addr -= 0x1000 + def __repr__(self): + return f"{self.__class__.__name__}:DMP:{hex(self.imgbase)[2:]}" + procmem = ProcessMemory diff --git a/setup.py b/setup.py index a6cf862..d4c4f68 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="malduck", - version="4.3.2", + version="4.4.0", description="Malduck is your ducky companion in malware analysis journeys", long_description=open("README.md").read(), long_description_content_type="text/markdown",