From 9b59664d9bef1e3ebf7a4307f3ec061f865efce7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arkadiusz=20Wr=C3=B3bel?= Date: Wed, 8 Jan 2020 16:47:23 +0100 Subject: [PATCH] Porting modules py3 --- malduck/extractor/extract_manager.py | 18 ++++++++++++++++-- malduck/main.py | 5 ++--- malduck/procmem/procmem.py | 12 +++++++++--- malduck/py2compat.py | 15 ++++++++++----- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/malduck/extractor/extract_manager.py b/malduck/extractor/extract_manager.py index 2a75f98..99189e0 100644 --- a/malduck/extractor/extract_manager.py +++ b/malduck/extractor/extract_manager.py @@ -4,6 +4,7 @@ from .extractor import Extractor from .loaders import load_modules +from ..py2compat import binary_type from ..yara import Yara log = logging.getLogger(__name__) @@ -19,6 +20,17 @@ def is_config_better(base_config, new_config): return len(new) > len(base) +def encode_for_json(data): + if isinstance(data, binary_type): + return data.decode('utf-8') + elif isinstance(data, list): + return [encode_for_json(item) for item in data] + elif isinstance(data, dict): + return {key: encode_for_json(value) for key, value in data.items()} + else: + return data + + def sanitize_config(config): """ Sanitize static configuration by removing empty strings/collections @@ -219,7 +231,7 @@ def extract_config(procmem): else: log.debug("{} - No luck.".format(fmt_procmem(procmem))) - log.debug("Matched rules: {}".format(matches.keys())) + log.debug("Matched rules: {}".format(list(matches.keys()))) # 'list()' for prettier logs ripped_family = None @@ -298,9 +310,11 @@ def push_config(self, config, extractor): :param extractor: Extractor object reference :type extractor: :class:`malduck.extractor.Extractor` """ + config = encode_for_json(config) try: json.dumps(config) - except (TypeError, OverflowError): + except (TypeError, OverflowError) as e: + log.debug("Config is not JSON-encodable ({}): {}".format(str(e), repr(config))) raise RuntimeError("Config must be JSON-encodable") config = sanitize_config(config) diff --git a/malduck/main.py b/malduck/main.py index 3a02278..480b9fb 100644 --- a/malduck/main.py +++ b/malduck/main.py @@ -60,7 +60,7 @@ def echo_config(extract_manager, file_path=None): else "[+] Ripped '{family}' configuration:" ) .format(family=config["family"], file_path=file_path) click.echo(message, err=True) - click.echo(json.dumps(config, indent=4)) + click.echo(json.dumps(config, indent=4, sort_keys=True)) if base is None: base = 0 @@ -82,9 +82,8 @@ def echo_config(extract_manager, file_path=None): else: files = [] click.echo("[!] Symbolic links are not supported, {} ignored.".format(path), err=True) - files.sort() - for file_path in files: + for file_path in sorted(files): extract_manager.push_file(file_path, base=base) if not analysis: echo_config(extract_manager, file_path) diff --git a/malduck/procmem/procmem.py b/malduck/procmem/procmem.py index 1d5c1ab..4503fd0 100644 --- a/malduck/procmem/procmem.py +++ b/malduck/procmem/procmem.py @@ -6,7 +6,7 @@ from ..disasm import disasm from ..string.bin import uint8, uint16, uint32, uint64, int8, int16, int32, int64 from ..string.ops import utf16z -from ..py2compat import ensure_bytes, ensure_string, binary_type +from ..py2compat import is_binary, ensure_string, binary_type class ProcessMemory(object): @@ -572,7 +572,10 @@ def regexp(self, query, offset=0, length=None): :rtype: Iterator[int] """ chunk = self.readp(offset, length) - query = ensure_bytes(query) + if not is_binary(query): + # Can't just encode the string. + # E.g. '\xf7'.encode('utf-8') would be encoded to b'\xc3\xb7' instead of b'\xf7'. + raise TypeError("Query argument must be binary type (bytes)") for entry in re.finditer(query, chunk, re.DOTALL): yield offset + entry.start() @@ -593,7 +596,10 @@ def regexv(self, query, addr=None, length=None): Method doesn't match bytes overlapping the border between regions """ - query = ensure_bytes(query) + if not is_binary(query): + # Can't just encode the string. + # E.g. '\xf7'.encode('utf-8') would be encoded to b'\xc3\xb7' instead of b'\xf7'. + raise TypeError("Query argument must be binary type (bytes)") for chunk_addr, chunk in self.readv_regions(addr, length, contiguous=False): for entry in re.finditer(query, chunk, re.DOTALL): yield chunk_addr + entry.start() diff --git a/malduck/py2compat.py b/malduck/py2compat.py index cc91216..f208eb9 100644 --- a/malduck/py2compat.py +++ b/malduck/py2compat.py @@ -26,18 +26,23 @@ def iterbytes(b): def ensure_bytes(v): """ - Py2: str -> str, unicode -> str - Py3: str -> bytes + Py2: str -> str; unicode -> str + Py3: bytes -> bytes; str -> bytes """ return v.encode("utf8") if not isinstance(v, binary_type) else v def ensure_string(v): """ - Py2: str -> str - Py3: bytes -> str + Py2: str -> str; unicode -> unicode + Py3: bytes -> str; str -> str """ - return v.decode("utf8") if PY3 and isinstance(v, binary_type) else v + if PY3 and isinstance(v, binary_type): + return v.decode("utf8") + elif isinstance(v, string_types): + return v + else: + raise TypeError('v should be str/unicode/bytes instead of ' + str(type(v))) def import_module(importer, module_name):