From 81667073d7f283f74932dbb489ed9de6e8b62337 Mon Sep 17 00:00:00 2001 From: Nicolas Nobelis Date: Thu, 30 Mar 2023 10:36:31 +0200 Subject: [PATCH] feat(model): Extends the model to capture Snippets from snippet scanners Snippet scanners such as ScanOSS [1] and FossID [2] can identify code snippets potentially coming from a third party source. To do so, they scan the Internet for source code and build a Knowledge Base (KB). Then, the source code to check for snippets is scanned and compared against this KB. Snippet Findings are not License nor Copyright findings as a human operator needs to review them and either accept or flag them as false positives. Therefore, this commit adds a new property ORT data model in the `ScanSummary` to carry these snippet findings. This model has been created by comparing the results from FossID and ScanOSS and trying to find a common abstraction. This is currently the minimal model required to handle snippets. Further properties such as matched lines (present in both results) will be added in the future. Blackduck [3] is another scanner considered for integration in ORT [4] which supports snippets. However since it does not deliver snippets through its API, it was not considered when designing the snippet Data model for ORT. Fixes: #3265. [1]: https://www.scanoss.com/ [2]: https://fossid.com/ [3]: https://www.synopsys.com/software-integrity/security-testing/software-composition-analysis.html [4]: https://github.com/oss-review-toolkit/ort/issues/4632 Signed-off-by: Nicolas Nobelis --- model/src/main/kotlin/ScanSummary.kt | 10 ++- model/src/main/kotlin/utils/SnippetFinding.kt | 63 +++++++++++++++++++ ...my-expected-output-for-analyzer-result.yml | 5 ++ .../src/main/kotlin/scanners/fossid/FossId.kt | 2 +- 4 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 model/src/main/kotlin/utils/SnippetFinding.kt diff --git a/model/src/main/kotlin/ScanSummary.kt b/model/src/main/kotlin/ScanSummary.kt index 9ab9bf4edb37c..783fab780de77 100644 --- a/model/src/main/kotlin/ScanSummary.kt +++ b/model/src/main/kotlin/ScanSummary.kt @@ -29,6 +29,7 @@ import java.util.SortedSet import org.ossreviewtoolkit.model.config.LicenseFilePatterns import org.ossreviewtoolkit.model.utils.RootLicenseMatcher +import org.ossreviewtoolkit.model.utils.SnippetFinding import org.ossreviewtoolkit.utils.common.FileMatcher import org.ossreviewtoolkit.utils.spdx.SpdxExpression @@ -66,6 +67,12 @@ data class ScanSummary( @JsonProperty("copyrights") val copyrightFindings: SortedSet, + /** + * The detected snippet findings, aggregated per file of the source code repository being scanned. + */ + @JsonProperty("snippets") + val snippetFindings: Map> = emptyMap(), + /** * The list of issues that occurred during the scan. This property is not serialized if the list is empty to reduce * the size of the result file. If there are no issues at all, [ScannerRun.hasIssues] already contains that @@ -84,7 +91,8 @@ data class ScanSummary( endTime = Instant.EPOCH, packageVerificationCode = "", licenseFindings = sortedSetOf(), - copyrightFindings = sortedSetOf() + copyrightFindings = sortedSetOf(), + snippetFindings = emptyMap() ) } diff --git a/model/src/main/kotlin/utils/SnippetFinding.kt b/model/src/main/kotlin/utils/SnippetFinding.kt new file mode 100644 index 0000000000000..53f81d1801331 --- /dev/null +++ b/model/src/main/kotlin/utils/SnippetFinding.kt @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2023 The ORT Project Authors (see ) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + * License-Filename: LICENSE + */ + +package org.ossreviewtoolkit.model.utils + +import org.ossreviewtoolkit.utils.spdx.SpdxExpression + +/** + * A class representing a snippet finding. A snippet finding is a code snippet of [artifact] and [version], with the + * following [licenses], matching the code being scanned. + * It is meant to be reviewed by an operator as it could be a false positive. + */ +data class SnippetFinding( + /** + * The author/vendor of the component the code snippet is commit from. + */ + val author: String, + /** + * The artifact name of the component the code snippet is commit from. + */ + val artifact: String, + /** + * The version of the component the code snippet is commit from. + */ + val version: String, + /** + * The license(s) of the component the code snippet is commit from. + */ + val licenses: Set, + /** + * The matching score between the code being scanned and the code snippet. This is scanner specific (e.g. for + * ScanOSS this is a percentage). + */ + val score: Float +) : Comparable { + companion object { + private val COMPARATOR = + compareBy( + { it.author }, + { it.artifact }, + { it.version }, + { it.licenses.toString() } + ).thenByDescending { it.score } + } + + override fun compareTo(other: SnippetFinding) = COMPARATOR.compare(this, other) +} diff --git a/scanner/src/funTest/assets/dummy-expected-output-for-analyzer-result.yml b/scanner/src/funTest/assets/dummy-expected-output-for-analyzer-result.yml index 0b152bcb9744b..96fe24e0bae4f 100644 --- a/scanner/src/funTest/assets/dummy-expected-output-for-analyzer-result.yml +++ b/scanner/src/funTest/assets/dummy-expected-output-for-analyzer-result.yml @@ -237,6 +237,7 @@ scanner: package_verification_code: "" licenses: [] copyrights: [] + snippets: {} issues: - timestamp: "1970-01-01T00:00:00Z" source: "scanner" @@ -282,6 +283,7 @@ scanner: start_line: -1 end_line: -1 copyrights: [] + snippets: {} Maven:org.apache.commons:commons-lang3:3.5: - provenance: source_artifact: @@ -309,6 +311,7 @@ scanner: start_line: -1 end_line: -1 copyrights: [] + snippets: {} Maven:org.apache.commons:commons-text:1.1: - provenance: source_artifact: @@ -336,6 +339,7 @@ scanner: start_line: -1 end_line: -1 copyrights: [] + snippets: {} Maven:org.hamcrest:hamcrest-core:1.3: - provenance: source_artifact: @@ -363,6 +367,7 @@ scanner: start_line: -1 end_line: -1 copyrights: [] + snippets: {} storage_stats: num_reads: 0 num_hits: 0 diff --git a/scanner/src/main/kotlin/scanners/fossid/FossId.kt b/scanner/src/main/kotlin/scanners/fossid/FossId.kt index f7f94c1a05812..dfcca886d596e 100644 --- a/scanner/src/main/kotlin/scanners/fossid/FossId.kt +++ b/scanner/src/main/kotlin/scanners/fossid/FossId.kt @@ -214,7 +214,7 @@ class FossId internal constructor( endTime: Instant = Instant.now(), issue: Issue ) = ScanSummary( - startTime, endTime, "", sortedSetOf(), sortedSetOf(), listOf(issue) + startTime, endTime, "", sortedSetOf(), sortedSetOf(), issues = listOf(issue) ) override fun scanPackage(pkg: Package, context: ScanContext): ScanResult {