Skip to content

Commit

Permalink
Merge branch 'porting-modules-py3' into 'master'
Browse files Browse the repository at this point in the history
Porting modules py3

See merge request mlwr/malduck!57
  • Loading branch information
catsuryuu committed Jan 8, 2020
2 parents 8f4a904 + 9b59664 commit 4d165ba
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 13 deletions.
18 changes: 16 additions & 2 deletions malduck/extractor/extract_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from .extractor import Extractor
from .loaders import load_modules
from ..py2compat import binary_type
from ..yara import Yara

log = logging.getLogger(__name__)
Expand All @@ -19,6 +20,17 @@ def is_config_better(base_config, new_config):
return len(new) > len(base)


def encode_for_json(data):
if isinstance(data, binary_type):
return data.decode('utf-8')
elif isinstance(data, list):
return [encode_for_json(item) for item in data]
elif isinstance(data, dict):
return {key: encode_for_json(value) for key, value in data.items()}
else:
return data


def sanitize_config(config):
"""
Sanitize static configuration by removing empty strings/collections
Expand Down Expand Up @@ -219,7 +231,7 @@ def extract_config(procmem):
else:
log.debug("{} - No luck.".format(fmt_procmem(procmem)))

log.debug("Matched rules: {}".format(matches.keys()))
log.debug("Matched rules: {}".format(list(matches.keys()))) # 'list()' for prettier logs

ripped_family = None

Expand Down Expand Up @@ -298,9 +310,11 @@ def push_config(self, config, extractor):
:param extractor: Extractor object reference
:type extractor: :class:`malduck.extractor.Extractor`
"""
config = encode_for_json(config)
try:
json.dumps(config)
except (TypeError, OverflowError):
except (TypeError, OverflowError) as e:
log.debug("Config is not JSON-encodable ({}): {}".format(str(e), repr(config)))
raise RuntimeError("Config must be JSON-encodable")

config = sanitize_config(config)
Expand Down
5 changes: 2 additions & 3 deletions malduck/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def echo_config(extract_manager, file_path=None):
else "[+] Ripped '{family}' configuration:"
) .format(family=config["family"], file_path=file_path)
click.echo(message, err=True)
click.echo(json.dumps(config, indent=4))
click.echo(json.dumps(config, indent=4, sort_keys=True))

if base is None:
base = 0
Expand All @@ -82,9 +82,8 @@ def echo_config(extract_manager, file_path=None):
else:
files = []
click.echo("[!] Symbolic links are not supported, {} ignored.".format(path), err=True)
files.sort()

for file_path in files:
for file_path in sorted(files):
extract_manager.push_file(file_path, base=base)
if not analysis:
echo_config(extract_manager, file_path)
Expand Down
12 changes: 9 additions & 3 deletions malduck/procmem/procmem.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ..disasm import disasm
from ..string.bin import uint8, uint16, uint32, uint64, int8, int16, int32, int64
from ..string.ops import utf16z
from ..py2compat import ensure_bytes, ensure_string, binary_type
from ..py2compat import is_binary, ensure_string, binary_type


class ProcessMemory(object):
Expand Down Expand Up @@ -572,7 +572,10 @@ def regexp(self, query, offset=0, length=None):
:rtype: Iterator[int]
"""
chunk = self.readp(offset, length)
query = ensure_bytes(query)
if not is_binary(query):
# Can't just encode the string.
# E.g. '\xf7'.encode('utf-8') would be encoded to b'\xc3\xb7' instead of b'\xf7'.
raise TypeError("Query argument must be binary type (bytes)")
for entry in re.finditer(query, chunk, re.DOTALL):
yield offset + entry.start()

Expand All @@ -593,7 +596,10 @@ def regexv(self, query, addr=None, length=None):
Method doesn't match bytes overlapping the border between regions
"""
query = ensure_bytes(query)
if not is_binary(query):
# Can't just encode the string.
# E.g. '\xf7'.encode('utf-8') would be encoded to b'\xc3\xb7' instead of b'\xf7'.
raise TypeError("Query argument must be binary type (bytes)")
for chunk_addr, chunk in self.readv_regions(addr, length, contiguous=False):
for entry in re.finditer(query, chunk, re.DOTALL):
yield chunk_addr + entry.start()
Expand Down
15 changes: 10 additions & 5 deletions malduck/py2compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,23 @@ def iterbytes(b):

def ensure_bytes(v):
"""
Py2: str -> str, unicode -> str
Py3: str -> bytes
Py2: str -> str; unicode -> str
Py3: bytes -> bytes; str -> bytes
"""
return v.encode("utf8") if not isinstance(v, binary_type) else v


def ensure_string(v):
"""
Py2: str -> str
Py3: bytes -> str
Py2: str -> str; unicode -> unicode
Py3: bytes -> str; str -> str
"""
return v.decode("utf8") if PY3 and isinstance(v, binary_type) else v
if PY3 and isinstance(v, binary_type):
return v.decode("utf8")
elif isinstance(v, string_types):
return v
else:
raise TypeError('v should be str/unicode/bytes instead of ' + str(type(v)))


def import_module(importer, module_name):
Expand Down

0 comments on commit 4d165ba

Please sign in to comment.