-
Notifications
You must be signed in to change notification settings - Fork 84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for Windows containers #181
Changes from 26 commits
354bf8b
624f8cd
927bd4b
ee5ea1b
fcfbe3c
7c66011
324cf3d
4f1eac7
198efed
3a49932
1b46251
c80f06b
358b9ac
a632464
431d1e4
04d45aa
9c74c4c
4636576
b216c6e
aaafc04
730e808
546296f
6baaeb0
76c9e4f
2220809
a48eb4c
c94fc15
ced8f38
53b128a
2645864
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# http://nexb.com and https://github.com/nexB/scancode.io | ||
# The ScanCode.io software is licensed under the Apache License version 2.0. | ||
# Data generated with ScanCode.io is provided as-is without warranties. | ||
# ScanCode is a trademark of nexB Inc. | ||
# | ||
# You may not use this software except in compliance with the License. | ||
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software distributed | ||
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations under the License. | ||
# | ||
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
# ScanCode.io should be considered or used as legal advice. Consult an Attorney | ||
# for any legal advice. | ||
# | ||
# ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https://github.com/nexB/scancode.io for support and download. | ||
|
||
from scanpipe.pipelines.docker import Docker | ||
from scanpipe.pipes import docker | ||
from scanpipe.pipes import rootfs | ||
from scanpipe.pipes import windows | ||
|
||
|
||
class WindowsDocker(Docker): | ||
""" | ||
A pipeline to analyze Windows Docker images. | ||
""" | ||
|
||
@classmethod | ||
def steps(cls): | ||
return ( | ||
cls.extract_images, | ||
cls.extract_layers, | ||
cls.find_images_os_and_distro, | ||
cls.collect_images_information, | ||
cls.collect_and_create_codebase_resources, | ||
cls.collect_and_create_system_packages, | ||
cls.tag_known_software_packages, | ||
cls.tag_uninteresting_codebase_resources, | ||
cls.tag_program_files_dirs_as_packages, | ||
cls.tag_empty_files, | ||
cls.scan_for_application_packages, | ||
cls.scan_for_files, | ||
cls.analyze_scanned_files, | ||
cls.tag_data_files_with_no_clues, | ||
cls.tag_not_analyzed_codebase_resources, | ||
) | ||
|
||
def tag_known_software_packages(self): | ||
JonoYang marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Flag files from well-known software packages by checking common install paths. | ||
""" | ||
windows.tag_known_software(self.project) | ||
|
||
def tag_uninteresting_codebase_resources(self): | ||
""" | ||
Flag files that are known to be uninteresting. | ||
""" | ||
docker.tag_whiteout_codebase_resources(self.project) | ||
windows.tag_uninteresting_windows_codebase_resources(self.project) | ||
rootfs.tag_ignorable_codebase_resources(self.project) | ||
rootfs.tag_media_files_as_uninteresting(self.project) | ||
|
||
def tag_program_files_dirs_as_packages(self): | ||
""" | ||
Report the immediate subdirectories of `Program Files` and `Program | ||
Files (x86)` as packages. | ||
""" | ||
windows.tag_program_files(self.project) | ||
|
||
def tag_data_files_with_no_clues(self): | ||
""" | ||
If a file is a data file and has no clues towards its origin, mark as | ||
uninteresting. | ||
""" | ||
rootfs.tag_data_files_with_no_clues(self.project) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
# ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https://github.com/nexB/scancode.io for support and download. | ||
|
||
import fnmatch | ||
import logging | ||
import os | ||
from functools import partial | ||
|
@@ -28,12 +29,14 @@ | |
from django.db.models import Q | ||
|
||
import attr | ||
from commoncode.ignore import default_ignores | ||
from container_inspector.distro import Distro | ||
|
||
from scanpipe import pipes | ||
from scanpipe.pipes import alpine | ||
from scanpipe.pipes import debian | ||
from scanpipe.pipes import rpm | ||
from scanpipe.pipes import windows | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
@@ -48,6 +51,7 @@ | |
"opensuse": rpm.package_getter, | ||
"opensuse-tumbleweed": rpm.package_getter, | ||
"photon": rpm.package_getter, | ||
"windows": windows.package_getter, | ||
} | ||
|
||
|
||
|
@@ -188,7 +192,7 @@ def has_hash_diff(install_file, codebase_resource): | |
|
||
def scan_rootfs_for_system_packages(project, rootfs, detect_licenses=True): | ||
""" | ||
Given a `project` Project and an `rootfs` RootFs, scan the `rootfs` for | ||
Given a `project` Project and a `rootfs` RootFs, scan the `rootfs` for | ||
installed system packages, and create a DiscoveredPackage for each. | ||
|
||
Then for each installed DiscoveredPackage file, check if it exists | ||
|
@@ -336,3 +340,94 @@ def tag_uninteresting_codebase_resources(project): | |
|
||
qs = project.codebaseresources.no_status() | ||
qs.filter(lookups).update(status="ignored-not-interesting") | ||
|
||
|
||
def tag_ignorable_codebase_resources(project): | ||
""" | ||
Using the glob patterns from commoncode.ignore of ignorable files/directories, | ||
tag codebase resources from `project` if their paths match an ignorable pattern. | ||
""" | ||
lookups = Q() | ||
for pattern in default_ignores.keys(): | ||
# Translate glob pattern to regex | ||
translated_pattern = fnmatch.translate(pattern) | ||
# PostgreSQL does not like parts of Python regex | ||
if translated_pattern.startswith("(?s"): | ||
translated_pattern = translated_pattern.replace("(?s", "(?") | ||
lookups |= Q(rootfs_path__icontains=pattern) | ||
lookups |= Q(rootfs_path__iregex=translated_pattern) | ||
|
||
qs = project.codebaseresources.no_status() | ||
qs.filter(lookups).update(status="ignored-default-ignores") | ||
|
||
|
||
def tag_data_files_with_no_clues(project): | ||
""" | ||
Tags CodebaseResources that have a file type of `data` and no detected clues | ||
to be uninteresting. | ||
""" | ||
lookup = Q( | ||
file_type="data", | ||
copyrights=[], | ||
holders=[], | ||
authors=[], | ||
licenses=[], | ||
license_expressions=[], | ||
emails=[], | ||
urls=[], | ||
) | ||
|
||
qs = project.codebaseresources | ||
qs.filter(lookup).update(status="ignored-data-file-no-clues") | ||
|
||
|
||
def tag_media_files_as_uninteresting(project): | ||
""" | ||
Tags CodebaseResources that are media files to be uninteresting. | ||
|
||
`mimes` and `types` are taken from TypeCode: | ||
https://github.com/nexB/typecode/blob/main/src/typecode/contenttype.py#L528 | ||
""" | ||
mimes = ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be directly imported from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tdruez On the other hand, what if we added the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, that sounds good. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added the |
||
"image", | ||
"picture", | ||
"audio", | ||
"video", | ||
"graphic", | ||
"sound", | ||
) | ||
|
||
types = ( | ||
"image data", | ||
"graphics image", | ||
"ms-windows metafont .wmf", | ||
"windows enhanced metafile", | ||
"png image", | ||
"interleaved image", | ||
"microsoft asf", | ||
"image text", | ||
"photoshop image", | ||
"shop pro image", | ||
"ogg data", | ||
"vorbis", | ||
"mpeg", | ||
"theora", | ||
"bitmap", | ||
"audio", | ||
"video", | ||
"sound", | ||
"riff", | ||
"icon", | ||
"pc bitmap", | ||
"image data", | ||
"netpbm", | ||
) | ||
|
||
lookup = Q() | ||
for mime_type in mimes: | ||
lookup |= Q(mime_type__icontains=mime_type) | ||
for file_type in types: | ||
lookup |= Q(file_type__icontains=file_type) | ||
|
||
qs = project.codebaseresources.no_status() | ||
qs.filter(lookup).update(status="ignored-media-file") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about
DockerWindows
anddocker_windows
instead? This would keep all Docker based pipeline grouped in the UI.I'm not sure about this though, what's your take?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it makes sense to rename the pipeline to
DockerWindows
for grouping.