Skip to content

Commit

Permalink
Improve regex used in tag_known_software #238
Browse files Browse the repository at this point in the history
    * Update tests with more paths to test regex patterns

Signed-off-by: Jono Yang <jyang@nexb.com>
  • Loading branch information
JonoYang committed Aug 2, 2021
1 parent 76c9e4f commit 2220809
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 20 deletions.
3 changes: 2 additions & 1 deletion scanpipe/pipes/rootfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,8 @@ def tag_ignorable_codebase_resources(project):

def tag_data_files_with_no_clues(project):
"""
Tag CodebaseResources that have a file type of `data` and no detected clues to be uninteresting.
Tag CodebaseResources that have a file type of `data` and no detected clues
to be uninteresting.
"""
lookup = Q(
file_type="data",
Expand Down
39 changes: 20 additions & 19 deletions scanpipe/pipes/windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,30 +138,31 @@ def tag_known_software(project):
then a version number of "nv" will be set.
"""
qs = project.codebaseresources.no_status()
python_root_directory_name_pattern = r"(^.*Python(\d*))/.*$"
python_root_directory_name_pattern = r"(^/(Files/)?Python(\d+)?)/.*$"
python_root_directory_name_pattern_compiled = re.compile(
python_root_directory_name_pattern
)
python_paths_by_versions = {}
lookup = Q(rootfs_path__regex=python_root_directory_name_pattern)
for python_codebase_resource in qs.filter(lookup):
_, python_root_dir, version, _ = re.split(
python_versions_by_path = {}
for python_codebase_resource in qs.filter(
rootfs_path__regex=python_root_directory_name_pattern
):
_, python_root_dir, _, version, _ = re.split(
python_root_directory_name_pattern_compiled,
python_codebase_resource.rootfs_path,
)
if python_root_dir in python_versions_by_path:
continue
if not version:
version = "nv"
if version in python_paths_by_versions:
continue
if version != "nv":
version = ".".join(digit for digit in version)
python_paths_by_versions[version] = python_root_dir
python_versions_by_path[python_root_dir] = version

# We do not want to tag the files in the `site-packages` directory as being
# from Python proper. The packages found here are oftentime third-party
# packages from outside the Python foundation
q_objects = [~Q(rootfs_path__icontains="site-packages")]
for python_version, python_path in python_paths_by_versions.items():
for python_path, python_version in python_versions_by_path.items():
python_package = win_reg.InstalledWindowsProgram(
name="Python",
version=python_version,
Expand All @@ -177,27 +178,27 @@ def tag_known_software(project):
)

qs = project.codebaseresources.no_status()
openjdk_root_directory_name_pattern = r"(^.*/(open)?jdk(-((\d*)(\.\d+)*))*)/.*$"
openjdk_root_directory_name_pattern = (
r"^(/(Files/)?(open)?jdk(-((\d*)(\.\d+)*))*)/.*$"
)
openjdk_root_directory_name_pattern_compiled = re.compile(
openjdk_root_directory_name_pattern
)
openjdk_paths_by_versions = {}
openjdk_versions_by_path = {}
for openjdk_codebase_resource in qs.filter(
rootfs_path__regex=openjdk_root_directory_name_pattern
):
_, openjdk_root_path, open_prefix, _, openjdk_version, _, _, _ = re.split(
_, openjdk_root_path, _, _, _, openjdk_version, _, _, _ = re.split(
openjdk_root_directory_name_pattern_compiled,
openjdk_codebase_resource.rootfs_path,
)
if openjdk_root_path in openjdk_versions_by_path:
continue
if not openjdk_version:
openjdk_version = "nv"
if not open_prefix:
open_prefix = ""
if openjdk_version in openjdk_paths_by_versions:
continue
openjdk_paths_by_versions[openjdk_version] = openjdk_root_path
openjdk_versions_by_path[openjdk_root_path] = openjdk_version

for openjdk_version, openjdk_path in openjdk_paths_by_versions.items():
for openjdk_path, openjdk_version in openjdk_versions_by_path.items():
openjdk_package = win_reg.InstalledWindowsProgram(
name="OpenJDK",
version=openjdk_version,
Expand Down Expand Up @@ -232,7 +233,7 @@ def tag_program_files(project):
program_files_one_directory_below_pattern
)
program_files_dirname_by_path = {}
for program_file in qs.filter(rootfs_path__regex="^.*/Program Files( \(x86\))?"):
for program_file in qs.filter(rootfs_path__regex=r"^.*/Program Files( \(x86\))?"):
_, program_files_subdir, _, dirname, _ = re.split(
program_files_one_directory_below_pattern_compiled, program_file.rootfs_path
)
Expand Down
106 changes: 106 additions & 0 deletions scanpipe/tests/test_pipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,18 +820,124 @@ def test_scanpipe_pipes_windows_tag_known_software(self):
path="root/Files/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL",
rootfs_path="/Files/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL",
)
resource6 = CodebaseResource.objects.create(
project=p1,
path="root/Files/jdk-11.0.1/readme.txt",
rootfs_path="/Files/jdk-11.0.1/readme.txt",
)
resource7 = CodebaseResource.objects.create(
project=p1,
path="root/Files/openjdk-11.0.1/readme.txt",
rootfs_path="/Files/openjdk-11.0.1/readme.txt",
)
resource8 = CodebaseResource.objects.create(
project=p1,
path="root/Files/jdk/readme.txt",
rootfs_path="/Files/jdk/readme.txt",
)
resource9 = CodebaseResource.objects.create(
project=p1,
path="root/Files/openjdk/readme.txt",
rootfs_path="/Files/openjdk/readme.txt",
)
resource10 = CodebaseResource.objects.create(
project=p1,
path="root/Files/Program Files/something-else/jdk/readme.txt",
rootfs_path="/Files/Program Files/something-else/jdk/readme.txt",
)
resource11 = CodebaseResource.objects.create(
project=p1,
path="root/Python/py.exe",
rootfs_path="/Python/py.exe",
)
resource12 = CodebaseResource.objects.create(
project=p1,
path="root/Python27/python2.exe",
rootfs_path="/Python27/python2.exe",
)
resource13 = CodebaseResource.objects.create(
project=p1,
path="root/Python3/python3.exe",
rootfs_path="/Python3/python3.exe",
)
resource14 = CodebaseResource.objects.create(
project=p1,
path="root/Python39/python3.9",
rootfs_path="/Python39/python3.9.exe",
)
resource15 = CodebaseResource.objects.create(
project=p1,
path="root/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL",
rootfs_path="/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL",
)
resource16 = CodebaseResource.objects.create(
project=p1,
path="root/jdk-11.0.1/readme.txt",
rootfs_path="/jdk-11.0.1/readme.txt",
)
resource17 = CodebaseResource.objects.create(
project=p1,
path="root/openjdk-11.0.1/readme.txt",
rootfs_path="/openjdk-11.0.1/readme.txt",
)
resource18 = CodebaseResource.objects.create(
project=p1,
path="root/jdk/readme.txt",
rootfs_path="/jdk/readme.txt",
)
resource19 = CodebaseResource.objects.create(
project=p1,
path="root/openjdk/readme.txt",
rootfs_path="/openjdk/readme.txt",
)
resource20 = CodebaseResource.objects.create(
project=p1,
path="root/Program Files/something-else/jdk/readme.txt",
rootfs_path="/Program Files/something-else/jdk/readme.txt",
)

windows.tag_known_software(p1)
resource11.refresh_from_db()
resource12.refresh_from_db()
resource13.refresh_from_db()
resource14.refresh_from_db()
resource15.refresh_from_db()
resource16.refresh_from_db()
resource17.refresh_from_db()
resource18.refresh_from_db()
resource19.refresh_from_db()
resource20.refresh_from_db()
resource1.refresh_from_db()
resource2.refresh_from_db()
resource3.refresh_from_db()
resource4.refresh_from_db()
resource5.refresh_from_db()
resource6.refresh_from_db()
resource7.refresh_from_db()
resource8.refresh_from_db()
resource9.refresh_from_db()
resource10.refresh_from_db()

self.assertEqual("installed-package", resource1.status)
self.assertEqual("installed-package", resource2.status)
self.assertEqual("installed-package", resource3.status)
self.assertEqual("installed-package", resource4.status)
self.assertEqual("", resource5.status)
self.assertEqual("installed-package", resource6.status)
self.assertEqual("installed-package", resource7.status)
self.assertEqual("installed-package", resource8.status)
self.assertEqual("installed-package", resource9.status)
self.assertEqual("", resource10.status)
self.assertEqual("installed-package", resource11.status)
self.assertEqual("installed-package", resource12.status)
self.assertEqual("installed-package", resource13.status)
self.assertEqual("installed-package", resource14.status)
self.assertEqual("", resource15.status)
self.assertEqual("installed-package", resource16.status)
self.assertEqual("installed-package", resource17.status)
self.assertEqual("installed-package", resource18.status)
self.assertEqual("installed-package", resource19.status)
self.assertEqual("", resource20.status)

def test_scanpipe_pipes_windows_tag_program_files(self):
p1 = Project.objects.create(name="Analysis")
Expand Down

0 comments on commit 2220809

Please sign in to comment.