Skip to content

Commit

Permalink
feat(model): Extend the model to capture Snippets from snippet scanners
Browse files Browse the repository at this point in the history
Snippet scanners such as ScanOSS [1] and FossID [2] can identify code
snippets potentially coming from a third party source. To do so, they
scan the Internet for source code and build a Knowledge Base (KB). Then,
the source code to check for snippets is scanned and compared against
this KB.

Snippet Findings are difference in nature from License and Copyright
findings as they reference a third party sourcecode.

Therefore, this commit adds a new property ORT data model in the
`ScanSummary` to carry these snippet findings. This model has been
created by comparing the results from FossID and ScanOSS and trying to
find a common abstraction. This is currently the minimal model required
to handle snippets. Further properties will be added in the future.

Blackduck [3] is another scanner considered for integration in ORT [4]
which supports snippets. However since it does not deliver snippets
through its API, it was not considered when designing the snippet data
model for ORT.

Fixes: oss-review-toolkit#3265.

[1]: https://www.scanoss.com/
[2]: https://fossid.com/
[3]: https://www.synopsys.com/software-integrity/security-testing/software-composition-analysis.html
[4]: oss-review-toolkit#4632

Signed-off-by: Nicolas Nobelis <nicolas.nobelis@bosch.io>
  • Loading branch information
nnobelis committed May 11, 2023
1 parent eb676c4 commit 2216f14
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 2 deletions.
13 changes: 12 additions & 1 deletion model/src/main/kotlin/ScanSummary.kt
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,15 @@ import com.fasterxml.jackson.annotation.JsonIgnore
import com.fasterxml.jackson.annotation.JsonIgnoreProperties
import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.databind.annotation.JsonSerialize

import java.time.Instant
import java.util.SortedSet

import org.ossreviewtoolkit.model.config.LicenseFilePatterns
import org.ossreviewtoolkit.model.utils.RootLicenseMatcher
import org.ossreviewtoolkit.model.utils.SnippetFinding
import org.ossreviewtoolkit.model.utils.SnippetFindingSortedSetConverter
import org.ossreviewtoolkit.utils.common.FileMatcher
import org.ossreviewtoolkit.utils.spdx.SpdxExpression

Expand Down Expand Up @@ -66,6 +69,13 @@ data class ScanSummary(
@JsonProperty("copyrights")
val copyrightFindings: SortedSet<CopyrightFinding>,

/**
* The detected snippet findings.
*/
@JsonProperty("snippets")
@JsonSerialize(converter = SnippetFindingSortedSetConverter::class)
val snippetFindings: Set<SnippetFinding> = setOf(),

/**
* The list of issues that occurred during the scan. This property is not serialized if the list is empty to reduce
* the size of the result file. If there are no issues at all, [ScannerRun.hasIssues] already contains that
Expand All @@ -84,7 +94,8 @@ data class ScanSummary(
endTime = Instant.EPOCH,
packageVerificationCode = "",
licenseFindings = sortedSetOf(),
copyrightFindings = sortedSetOf()
copyrightFindings = sortedSetOf(),
snippetFindings = sortedSetOf()
)
}

Expand Down
53 changes: 53 additions & 0 deletions model/src/main/kotlin/utils/Snippet.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright (C) 2023 The ORT Project Authors (See <https://github.com/oss-review-toolkit/ort-server/blob/main/NOTICE>)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
* License-Filename: LICENSE
*/

package org.ossreviewtoolkit.model.utils

import org.ossreviewtoolkit.model.Provenance
import org.ossreviewtoolkit.model.TextLocation
import org.ossreviewtoolkit.utils.spdx.SpdxExpression

data class Snippet(
/**
* The matching score between the code being scanned and the code snippet. This is scanner specific (e.g. for
* ScanOSS this is a percentage).
*/
val score: Float,

/**
* The text location in the snippet that has matched.
*/
val location: TextLocation,

/**
* The provenance of the snippet, either an artifact or a repository.
*/
val provenance: Provenance,

/**
* The purl representing the author/vendor, artifact, version of the code snippet. If the snippet scanner does not
* natively support purls, it will be generated by ORT.
*/
val purl: String,

/**
* The license of the component the code snippet is commit from.
*/
val licenses: SpdxExpression
)
39 changes: 39 additions & 0 deletions model/src/main/kotlin/utils/SnippetFinding.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright (C) 2023 The ORT Project Authors (see <https://github.com/oss-review-toolkit/ort/blob/main/NOTICE>)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
* License-Filename: LICENSE
*/

package org.ossreviewtoolkit.model.utils

import org.ossreviewtoolkit.model.TextLocation

/**
* A class representing a snippet finding for a source file. A snippet finding is a code snippet from another origin,
* matching the code being scanned.
* It is meant to be reviewed by an operator as it could be a false positive.
*/
data class SnippetFinding(
/**
* The text location in the scanned source file where the snippet has matched.
*/
val sourceLocation: TextLocation,

/**
* The corresponding snippet.
*/
val snippet: Snippet
)
5 changes: 5 additions & 0 deletions model/src/main/kotlin/utils/SortedSetConverters.kt
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,9 @@ class ProjectSortedSetConverter : StdConverter<Set<Project>, SortedSet<Project>>
override fun convert(value: Set<Project>) = value.toSortedSet(compareBy { it.id })
}

class SnippetFindingSortedSetConverter : StdConverter<Set<SnippetFinding>, SortedSet<SnippetFinding>>() {
override fun convert(value: Set<SnippetFinding>) =
value.toSortedSet(compareBy<SnippetFinding> { it.sourceLocation.path }.thenByDescending { it.snippet.purl })
}

// TODO: Add more converters to get rid of Comparable implementations that just serve sorted output.
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ scanner:
package_verification_code: ""
licenses: []
copyrights: []
snippets: []
issues:
- timestamp: "1970-01-01T00:00:00Z"
source: "scanner"
Expand Down Expand Up @@ -281,6 +282,7 @@ scanner:
start_line: -1
end_line: -1
copyrights: []
snippets: []
Maven:org.apache.commons:commons-lang3:3.5:
- provenance:
source_artifact:
Expand Down Expand Up @@ -308,6 +310,7 @@ scanner:
start_line: -1
end_line: -1
copyrights: []
snippets: []
Maven:org.apache.commons:commons-text:1.1:
- provenance:
source_artifact:
Expand Down Expand Up @@ -335,6 +338,7 @@ scanner:
start_line: -1
end_line: -1
copyrights: []
snippets: []
Maven:org.hamcrest:hamcrest-core:1.3:
- provenance:
source_artifact:
Expand Down Expand Up @@ -362,6 +366,7 @@ scanner:
start_line: -1
end_line: -1
copyrights: []
snippets: []
storage_stats:
num_reads: 0
num_hits: 0
Expand Down
24 changes: 24 additions & 0 deletions scanner/src/main/kotlin/provenance/NestedProvenanceScanResult.kt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import org.ossreviewtoolkit.model.OrtResult
import org.ossreviewtoolkit.model.RepositoryProvenance
import org.ossreviewtoolkit.model.ScanResult
import org.ossreviewtoolkit.model.ScanSummary
import org.ossreviewtoolkit.model.utils.SnippetFinding

/**
* A class that contains all [ScanResult]s for a [NestedProvenance].
Expand Down Expand Up @@ -87,6 +88,7 @@ data class NestedProvenanceScanResult(

val licenseFindings = scanResultsForScanner.mergeLicenseFindings()
val copyrightFindings = scanResultsForScanner.mergeCopyrightFindings()
val snippetFindings = scanResultsForScanner.mergeSnippetFindings()

ScanResult(
provenance = nestedProvenance.root,
Expand All @@ -97,6 +99,7 @@ data class NestedProvenanceScanResult(
packageVerificationCode = "",
licenseFindings = licenseFindings,
copyrightFindings = copyrightFindings,
snippetFindings = snippetFindings,
issues = issues
),
additionalData = allScanResults.map { it.additionalData }.reduce { acc, map -> acc + map }
Expand Down Expand Up @@ -130,6 +133,27 @@ data class NestedProvenanceScanResult(
return findings
}

private fun Map<KnownProvenance, List<ScanResult>>.mergeSnippetFindings(): Set<SnippetFinding> {
val findingsByPath = mapKeys { getPath(it.key) }.mapValues { (_, scanResults) ->
mutableSetOf<SnippetFinding>().also { acc ->
scanResults.forEach { acc += it.summary.snippetFindings }
}
}

val allFindings = mutableSetOf<SnippetFinding>()

findingsByPath.map { (path, findings) ->
val prefix = if (path.isEmpty()) path else "$path/"

allFindings += findings.map { finding ->
val newPath = "$prefix${finding.sourceLocation.path}"
finding.copy(sourceLocation = finding.sourceLocation.copy(path = newPath))
}.toSet()
}

return allFindings
}

private fun getPath(provenance: KnownProvenance) = nestedProvenance.getPath(provenance)

fun filterByIgnorePatterns(ignorePatterns: List<String>): NestedProvenanceScanResult =
Expand Down
2 changes: 1 addition & 1 deletion scanner/src/main/kotlin/scanners/fossid/FossId.kt
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ class FossId internal constructor(
endTime: Instant = Instant.now(),
issue: Issue
) = ScanSummary(
startTime, endTime, "", sortedSetOf(), sortedSetOf(), listOf(issue)
startTime, endTime, "", sortedSetOf(), sortedSetOf(), issues = listOf(issue)
)

override fun scanPackage(pkg: Package, context: ScanContext): ScanResult {
Expand Down

0 comments on commit 2216f14

Please sign in to comment.