From 2220809b716a7e5def9a2fe062974261144b06af Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Mon, 2 Aug 2021 13:36:49 -0700 Subject: [PATCH] Improve regex used in tag_known_software #238 * Update tests with more paths to test regex patterns Signed-off-by: Jono Yang --- scanpipe/pipes/rootfs.py | 3 +- scanpipe/pipes/windows.py | 39 ++++++------- scanpipe/tests/test_pipes.py | 106 +++++++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 20 deletions(-) diff --git a/scanpipe/pipes/rootfs.py b/scanpipe/pipes/rootfs.py index ff98ddfd4..15dfab9bb 100644 --- a/scanpipe/pipes/rootfs.py +++ b/scanpipe/pipes/rootfs.py @@ -362,7 +362,8 @@ def tag_ignorable_codebase_resources(project): def tag_data_files_with_no_clues(project): """ - Tag CodebaseResources that have a file type of `data` and no detected clues to be uninteresting. + Tag CodebaseResources that have a file type of `data` and no detected clues + to be uninteresting. """ lookup = Q( file_type="data", diff --git a/scanpipe/pipes/windows.py b/scanpipe/pipes/windows.py index 498e4a3fe..d2537461d 100644 --- a/scanpipe/pipes/windows.py +++ b/scanpipe/pipes/windows.py @@ -138,30 +138,31 @@ def tag_known_software(project): then a version number of "nv" will be set. """ qs = project.codebaseresources.no_status() - python_root_directory_name_pattern = r"(^.*Python(\d*))/.*$" + python_root_directory_name_pattern = r"(^/(Files/)?Python(\d+)?)/.*$" python_root_directory_name_pattern_compiled = re.compile( python_root_directory_name_pattern ) - python_paths_by_versions = {} - lookup = Q(rootfs_path__regex=python_root_directory_name_pattern) - for python_codebase_resource in qs.filter(lookup): - _, python_root_dir, version, _ = re.split( + python_versions_by_path = {} + for python_codebase_resource in qs.filter( + rootfs_path__regex=python_root_directory_name_pattern + ): + _, python_root_dir, _, version, _ = re.split( python_root_directory_name_pattern_compiled, python_codebase_resource.rootfs_path, ) + if python_root_dir in python_versions_by_path: + continue if not version: version = "nv" - if version in python_paths_by_versions: - continue if version != "nv": version = ".".join(digit for digit in version) - python_paths_by_versions[version] = python_root_dir + python_versions_by_path[python_root_dir] = version # We do not want to tag the files in the `site-packages` directory as being # from Python proper. The packages found here are oftentime third-party # packages from outside the Python foundation q_objects = [~Q(rootfs_path__icontains="site-packages")] - for python_version, python_path in python_paths_by_versions.items(): + for python_path, python_version in python_versions_by_path.items(): python_package = win_reg.InstalledWindowsProgram( name="Python", version=python_version, @@ -177,27 +178,27 @@ def tag_known_software(project): ) qs = project.codebaseresources.no_status() - openjdk_root_directory_name_pattern = r"(^.*/(open)?jdk(-((\d*)(\.\d+)*))*)/.*$" + openjdk_root_directory_name_pattern = ( + r"^(/(Files/)?(open)?jdk(-((\d*)(\.\d+)*))*)/.*$" + ) openjdk_root_directory_name_pattern_compiled = re.compile( openjdk_root_directory_name_pattern ) - openjdk_paths_by_versions = {} + openjdk_versions_by_path = {} for openjdk_codebase_resource in qs.filter( rootfs_path__regex=openjdk_root_directory_name_pattern ): - _, openjdk_root_path, open_prefix, _, openjdk_version, _, _, _ = re.split( + _, openjdk_root_path, _, _, _, openjdk_version, _, _, _ = re.split( openjdk_root_directory_name_pattern_compiled, openjdk_codebase_resource.rootfs_path, ) + if openjdk_root_path in openjdk_versions_by_path: + continue if not openjdk_version: openjdk_version = "nv" - if not open_prefix: - open_prefix = "" - if openjdk_version in openjdk_paths_by_versions: - continue - openjdk_paths_by_versions[openjdk_version] = openjdk_root_path + openjdk_versions_by_path[openjdk_root_path] = openjdk_version - for openjdk_version, openjdk_path in openjdk_paths_by_versions.items(): + for openjdk_path, openjdk_version in openjdk_versions_by_path.items(): openjdk_package = win_reg.InstalledWindowsProgram( name="OpenJDK", version=openjdk_version, @@ -232,7 +233,7 @@ def tag_program_files(project): program_files_one_directory_below_pattern ) program_files_dirname_by_path = {} - for program_file in qs.filter(rootfs_path__regex="^.*/Program Files( \(x86\))?"): + for program_file in qs.filter(rootfs_path__regex=r"^.*/Program Files( \(x86\))?"): _, program_files_subdir, _, dirname, _ = re.split( program_files_one_directory_below_pattern_compiled, program_file.rootfs_path ) diff --git a/scanpipe/tests/test_pipes.py b/scanpipe/tests/test_pipes.py index d3fcc729b..6567ddd77 100644 --- a/scanpipe/tests/test_pipes.py +++ b/scanpipe/tests/test_pipes.py @@ -820,18 +820,124 @@ def test_scanpipe_pipes_windows_tag_known_software(self): path="root/Files/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL", rootfs_path="/Files/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL", ) + resource6 = CodebaseResource.objects.create( + project=p1, + path="root/Files/jdk-11.0.1/readme.txt", + rootfs_path="/Files/jdk-11.0.1/readme.txt", + ) + resource7 = CodebaseResource.objects.create( + project=p1, + path="root/Files/openjdk-11.0.1/readme.txt", + rootfs_path="/Files/openjdk-11.0.1/readme.txt", + ) + resource8 = CodebaseResource.objects.create( + project=p1, + path="root/Files/jdk/readme.txt", + rootfs_path="/Files/jdk/readme.txt", + ) + resource9 = CodebaseResource.objects.create( + project=p1, + path="root/Files/openjdk/readme.txt", + rootfs_path="/Files/openjdk/readme.txt", + ) + resource10 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Program Files/something-else/jdk/readme.txt", + rootfs_path="/Files/Program Files/something-else/jdk/readme.txt", + ) + resource11 = CodebaseResource.objects.create( + project=p1, + path="root/Python/py.exe", + rootfs_path="/Python/py.exe", + ) + resource12 = CodebaseResource.objects.create( + project=p1, + path="root/Python27/python2.exe", + rootfs_path="/Python27/python2.exe", + ) + resource13 = CodebaseResource.objects.create( + project=p1, + path="root/Python3/python3.exe", + rootfs_path="/Python3/python3.exe", + ) + resource14 = CodebaseResource.objects.create( + project=p1, + path="root/Python39/python3.9", + rootfs_path="/Python39/python3.9.exe", + ) + resource15 = CodebaseResource.objects.create( + project=p1, + path="root/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL", + rootfs_path="/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL", + ) + resource16 = CodebaseResource.objects.create( + project=p1, + path="root/jdk-11.0.1/readme.txt", + rootfs_path="/jdk-11.0.1/readme.txt", + ) + resource17 = CodebaseResource.objects.create( + project=p1, + path="root/openjdk-11.0.1/readme.txt", + rootfs_path="/openjdk-11.0.1/readme.txt", + ) + resource18 = CodebaseResource.objects.create( + project=p1, + path="root/jdk/readme.txt", + rootfs_path="/jdk/readme.txt", + ) + resource19 = CodebaseResource.objects.create( + project=p1, + path="root/openjdk/readme.txt", + rootfs_path="/openjdk/readme.txt", + ) + resource20 = CodebaseResource.objects.create( + project=p1, + path="root/Program Files/something-else/jdk/readme.txt", + rootfs_path="/Program Files/something-else/jdk/readme.txt", + ) windows.tag_known_software(p1) + resource11.refresh_from_db() + resource12.refresh_from_db() + resource13.refresh_from_db() + resource14.refresh_from_db() + resource15.refresh_from_db() + resource16.refresh_from_db() + resource17.refresh_from_db() + resource18.refresh_from_db() + resource19.refresh_from_db() + resource20.refresh_from_db() resource1.refresh_from_db() resource2.refresh_from_db() resource3.refresh_from_db() resource4.refresh_from_db() resource5.refresh_from_db() + resource6.refresh_from_db() + resource7.refresh_from_db() + resource8.refresh_from_db() + resource9.refresh_from_db() + resource10.refresh_from_db() + self.assertEqual("installed-package", resource1.status) self.assertEqual("installed-package", resource2.status) self.assertEqual("installed-package", resource3.status) self.assertEqual("installed-package", resource4.status) self.assertEqual("", resource5.status) + self.assertEqual("installed-package", resource6.status) + self.assertEqual("installed-package", resource7.status) + self.assertEqual("installed-package", resource8.status) + self.assertEqual("installed-package", resource9.status) + self.assertEqual("", resource10.status) + self.assertEqual("installed-package", resource11.status) + self.assertEqual("installed-package", resource12.status) + self.assertEqual("installed-package", resource13.status) + self.assertEqual("installed-package", resource14.status) + self.assertEqual("", resource15.status) + self.assertEqual("installed-package", resource16.status) + self.assertEqual("installed-package", resource17.status) + self.assertEqual("installed-package", resource18.status) + self.assertEqual("installed-package", resource19.status) + self.assertEqual("", resource20.status) def test_scanpipe_pipes_windows_tag_program_files(self): p1 = Project.objects.create(name="Analysis")