Skip to content

Commit

Permalink
feat(downloader): Implement a basic dry-run option
Browse files Browse the repository at this point in the history
This option can be used to rather quickly check whether packages from an
analyzer result can be downloaded without actually running the scanner /
downloader. As such the option can also be used to more quickly verify
curations after (re-)applying them to the analyzer result. For the
latter, a proper solution yet needs to be implemented, see [1].

Note that the implementation is not complete yet. E.g. not all cases
where a real download would succeed can be verified, as guessing
revisions while keeping downloads to a minimum is difficult to
implement for a dry run.

[1]: #6188

Signed-off-by: Sebastian Schuberth <sschuberth@gmail.com>
  • Loading branch information
sschuberth committed May 17, 2023
1 parent 3e15f98 commit 4b8f55d
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 14 deletions.
24 changes: 17 additions & 7 deletions cli/src/main/kotlin/commands/DownloaderCommand.kt
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ class DownloaderCommand : OrtCommand(
help = "Do not download excluded projects or packages. Works only with the '--ort-file' parameter."
).flag()

private val dryRun by option(
"--dry-run",
help = "Do not actually download anything but just verify that all source code locations are valid."
).flag()

override fun run() {
val failureMessages = mutableListOf<String>()

Expand All @@ -204,7 +209,8 @@ class DownloaderCommand : OrtCommand(
}
}

println("The download took $duration.")
val verb = if (dryRun) "verification" else "download"
println("The $verb took $duration.")

if (failureMessages.isNotEmpty()) {
logger.error {
Expand Down Expand Up @@ -289,7 +295,7 @@ class DownloaderCommand : OrtCommand(

runBlocking { downloadAllPackages(packageDownloadDirs, failureMessages) }

if (archiveMode == ArchiveMode.BUNDLE) {
if (archiveMode == ArchiveMode.BUNDLE && !dryRun) {
val zipFile = outputDir.resolve("archive.zip")

logger.info { "Archiving directory '$outputDir' to '$zipFile'." }
Expand All @@ -313,9 +319,12 @@ class DownloaderCommand : OrtCommand(
packageDownloadDirs.entries.mapIndexed { index, (pkg, dir) ->
async {
val progress = "${index + 1} of ${packageDownloadDirs.size}"
println("Starting download for ${pkg.id.toCoordinates()} ($progress).")

val verb = if (dryRun) "Verifying" else "Starting"
println("$verb download for '${pkg.id.toCoordinates()}' ($progress).")

downloadPackage(pkg, dir, failureMessages).also {
println("Finished download for ${pkg.id.toCoordinates()} ($progress).")
if (!dryRun) println("Finished download for ${pkg.id.toCoordinates()} ($progress).")
}
}
}.awaitAll()
Expand All @@ -324,9 +333,9 @@ class DownloaderCommand : OrtCommand(

private fun downloadPackage(pkg: Package, dir: File, failureMessages: MutableList<String>) {
try {
Downloader(ortConfig.downloader).download(pkg, dir)
Downloader(ortConfig.downloader).download(pkg, dir, dryRun)

if (archiveMode == ArchiveMode.ENTITY) {
if (archiveMode == ArchiveMode.ENTITY && !dryRun) {
val zipFile = outputDir.resolve("${pkg.id.toPath("-")}.zip")

logger.info { "Archiving directory '$dir' to '$zipFile'." }
Expand Down Expand Up @@ -406,7 +415,8 @@ class DownloaderCommand : OrtCommand(
// convenience as often the latest revision (referred to by some VCS-specific symbolic name) of a
// project needs to be downloaded.
val config = ortConfig.downloader.copy(allowMovingRevisions = true)
val provenance = Downloader(config).download(dummyPackage, outputDir)

val provenance = Downloader(config).download(dummyPackage, outputDir, dryRun)
println("Successfully downloaded $provenance.")
}.onFailure {
it.showStackTrace()
Expand Down
52 changes: 45 additions & 7 deletions downloader/src/main/kotlin/Downloader.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ package org.ossreviewtoolkit.downloader

import java.io.File
import java.io.IOException
import java.net.HttpURLConnection
import java.net.URI

import kotlin.time.TimeSource
Expand All @@ -47,6 +48,7 @@ import org.ossreviewtoolkit.utils.common.unpackTryAllTypes
import org.ossreviewtoolkit.utils.ort.createOrtTempDir
import org.ossreviewtoolkit.utils.ort.downloadFile
import org.ossreviewtoolkit.utils.ort.okHttpClient
import org.ossreviewtoolkit.utils.ort.ping

/**
* The class to download source code. The signatures of public functions in this class define the library API.
Expand All @@ -66,7 +68,7 @@ class Downloader(private val config: DownloaderConfiguration) {
* Download the source code of the [package][pkg] to the [outputDirectory]. A [Provenance] is returned on success or
* a [DownloadException] is thrown in case of failure.
*/
fun download(pkg: Package, outputDirectory: File): Provenance {
fun download(pkg: Package, outputDirectory: File, dryRun: Boolean = false): Provenance {
verifyOutputDirectory(outputDirectory)

if (pkg.isMetadataOnly) return UnknownProvenance
Expand All @@ -75,8 +77,8 @@ class Downloader(private val config: DownloaderConfiguration) {

config.sourceCodeOrigins.forEach { origin ->
val provenance = when (origin) {
SourceCodeOrigin.VCS -> handleVcsDownload(pkg, outputDirectory, exception)
SourceCodeOrigin.ARTIFACT -> handleSourceArtifactDownload(pkg, outputDirectory, exception)
SourceCodeOrigin.VCS -> handleVcsDownload(pkg, outputDirectory, dryRun, exception)
SourceCodeOrigin.ARTIFACT -> handleSourceArtifactDownload(pkg, outputDirectory, dryRun, exception)
}

if (provenance != null) return provenance
Expand All @@ -92,6 +94,7 @@ class Downloader(private val config: DownloaderConfiguration) {
private fun handleVcsDownload(
pkg: Package,
outputDirectory: File,
dryRun: Boolean,
exception: DownloadException
): Provenance? {
val vcsMark = TimeSource.Monotonic.markNow()
Expand All @@ -102,7 +105,7 @@ class Downloader(private val config: DownloaderConfiguration) {
val isCargoPackageWithSourceArtifact = pkg.id.type == "Cargo" && pkg.sourceArtifact != RemoteArtifact.EMPTY

if (!isCargoPackageWithSourceArtifact) {
val result = downloadFromVcs(pkg, outputDirectory)
val result = downloadFromVcs(pkg, outputDirectory, dryRun = dryRun)
val vcsInfo = (result as RepositoryProvenance).vcsInfo

logger.info {
Expand Down Expand Up @@ -137,12 +140,13 @@ class Downloader(private val config: DownloaderConfiguration) {
private fun handleSourceArtifactDownload(
pkg: Package,
outputDirectory: File,
dryRun: Boolean,
exception: DownloadException
): Provenance? {
val sourceArtifactMark = TimeSource.Monotonic.markNow()

try {
val result = downloadSourceArtifact(pkg.sourceArtifact, outputDirectory)
val result = downloadSourceArtifact(pkg.sourceArtifact, outputDirectory, dryRun)

logger.info {
"Downloaded source code for '${pkg.id.toCoordinates()}' from ${pkg.sourceArtifact} in " +
Expand Down Expand Up @@ -178,7 +182,8 @@ class Downloader(private val config: DownloaderConfiguration) {
fun downloadFromVcs(
pkg: Package,
outputDirectory: File,
recursive: Boolean = true
recursive: Boolean = true,
dryRun: Boolean = false
): Provenance {
if (pkg.vcsProcessed.url.isBlank()) {
val hint = when (pkg.id.type) {
Expand Down Expand Up @@ -243,6 +248,23 @@ class Downloader(private val config: DownloaderConfiguration) {
throw DownloadException("Unsupported VCS type '${pkg.vcsProcessed.type}'.")
}

if (dryRun) {
// TODO: For performance reasons, the current check only works if the VCS revision is present (does not have
// to be guessed) and if the VCS host is recognized. The trick is to do a HTTP HEAD request on the
// archive download URL instead of implementing existence checks for all supported VCS. While this
// does not cover all cases, it works for many cases and is quite fast.
val url = VcsHost.fromUrl(pkg.vcsProcessed.url)?.toArchiveDownloadUrl(pkg.vcsProcessed)
?: throw DownloadException("Unhandled VCS URL ${pkg.vcsProcessed.url}.")

val response = okHttpClient.ping(url)

return if (response.code == HttpURLConnection.HTTP_OK) {
RepositoryProvenance(pkg.vcsProcessed, pkg.vcsProcessed.revision)
} else {
throw DownloadException("Cannot verify existence of ${pkg.vcsProcessed}.")
}
}

val workingTree = try {
applicableVcs.download(pkg, outputDirectory, config.allowMovingRevisions, recursive)
} catch (e: DownloadException) {
Expand Down Expand Up @@ -277,7 +299,11 @@ class Downloader(private val config: DownloaderConfiguration) {
* Download the [sourceArtifact] and unpack it to the [outputDirectory]. A [Provenance] is returned on success or a
* [DownloadException] is thrown in case of failure.
*/
fun downloadSourceArtifact(sourceArtifact: RemoteArtifact, outputDirectory: File): Provenance {
fun downloadSourceArtifact(
sourceArtifact: RemoteArtifact,
outputDirectory: File,
dryRun: Boolean = false
): Provenance {
if (sourceArtifact.url.isBlank()) {
throw DownloadException("No source artifact URL provided.")
}
Expand All @@ -303,6 +329,18 @@ class Downloader(private val config: DownloaderConfiguration) {
}
}

if (dryRun) {
if (sourceArchive.isFile) return ArtifactProvenance(sourceArtifact)

val response = okHttpClient.ping(sourceArtifact.url)

return if (response.code == HttpURLConnection.HTTP_OK) {
ArtifactProvenance(sourceArtifact)
} else {
UnknownProvenance
}
}

if (sourceArtifact.hash.algorithm != HashAlgorithm.NONE) {
if (sourceArtifact.hash.algorithm == HashAlgorithm.UNKNOWN) {
logger.warn {
Expand Down
8 changes: 8 additions & 0 deletions utils/ort/src/main/kotlin/OkHttpClientHelper.kt
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,14 @@ fun OkHttpClient.download(url: String, acceptEncoding: String? = null): Result<P
response to body
}

/**
* Execute a HEAD-request against [url] to ping for its existence.
*/
fun OkHttpClient.ping(url: String): Response =
Request.Builder().head().url(url).build().let { request ->
execute(request)
}

/**
* Execute a [request] using the client.
*/
Expand Down

0 comments on commit 4b8f55d

Please sign in to comment.