Skip to content

Commit

Permalink
refactor: Make ProvenanceFileStorage operate on stream instead of f…
Browse files Browse the repository at this point in the history
…iles

This avoids the temporary creation of files as well as potential confusion
about persisting the file name, and paves the way for storages that
cannot operate on files but on streams.

Resolves #7118.

Signed-off-by: Sebastian Schuberth <sschuberth@gmail.com>
  • Loading branch information
sschuberth committed Jun 19, 2023
1 parent 03aa926 commit 992cd8f
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 112 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,10 @@

package org.ossreviewtoolkit.model.utils

import io.kotest.core.TestConfiguration
import io.kotest.core.spec.style.WordSpec
import io.kotest.engine.spec.tempfile
import io.kotest.matchers.shouldBe

import java.io.File
import java.io.InputStream

import org.ossreviewtoolkit.model.ArtifactProvenance
import org.ossreviewtoolkit.model.Hash
Expand All @@ -46,18 +44,6 @@ private val VCS_PROVENANCE = RepositoryProvenance(
resolvedRevision = "0000000000000000000000000000000000000000"
)

private fun File.readTextAndDelete(): String {
val text = readText()
delete()

return text
}

private fun TestConfiguration.writeTempFile(content: String): File =
tempfile().apply {
writeText(content)
}

class PostgresProvenanceFileStorageFunTest : WordSpec({
val postgresListener = PostgresListener()
lateinit var storage: PostgresProvenanceFileStorage
Expand All @@ -68,35 +54,35 @@ class PostgresProvenanceFileStorageFunTest : WordSpec({
storage = PostgresProvenanceFileStorage(postgresListener.dataSource, FileArchiverConfiguration.TABLE_NAME)
}

"hasFile()" should {
"return false when no file for the given provenance has been added" {
storage.hasFile(VCS_PROVENANCE) shouldBe false
"hasData()" should {
"return false when no data for the given provenance has been added" {
storage.hasData(VCS_PROVENANCE) shouldBe false
}

"return true when a file for the given provenance has been added" {
storage.putFile(VCS_PROVENANCE, writeTempFile("content"))
"return true when data for the given provenance has been added" {
storage.putData(VCS_PROVENANCE, InputStream.nullInputStream())

storage.hasFile(VCS_PROVENANCE) shouldBe true
storage.hasData(VCS_PROVENANCE) shouldBe true
}
}

"putFile()" should {
"return the file corresponding to the given provenance given such file has been added" {
storage.putFile(VCS_PROVENANCE, writeTempFile("VCS"))
storage.putFile(SOURCE_ARTIFACT_PROVENANCE, writeTempFile("source artifact"))
"putData()" should {
"return the data corresponding to the given provenance given such data has been added" {
storage.putData(VCS_PROVENANCE, "VCS".byteInputStream())
storage.putData(SOURCE_ARTIFACT_PROVENANCE, "source artifact".byteInputStream())

storage.getFile(VCS_PROVENANCE) shouldNotBeNull { readTextAndDelete() shouldBe "VCS" }
storage.getFile(SOURCE_ARTIFACT_PROVENANCE) shouldNotBeNull {
readTextAndDelete() shouldBe "source artifact"
storage.getData(VCS_PROVENANCE) shouldNotBeNull { String(use { readBytes() }) shouldBe "VCS" }
storage.getData(SOURCE_ARTIFACT_PROVENANCE) shouldNotBeNull {
String(use { readBytes() }) shouldBe "source artifact"
}
}

"return the overwritten file corresponding to the given provenance" {
storage.putFile(SOURCE_ARTIFACT_PROVENANCE, writeTempFile("source artifact"))
storage.putFile(SOURCE_ARTIFACT_PROVENANCE, writeTempFile("source artifact updated"))
storage.putData(SOURCE_ARTIFACT_PROVENANCE, "source artifact".byteInputStream())
storage.putData(SOURCE_ARTIFACT_PROVENANCE, "source artifact updated".byteInputStream())

storage.getFile(SOURCE_ARTIFACT_PROVENANCE) shouldNotBeNull {
readTextAndDelete() shouldBe "source artifact updated"
storage.getData(SOURCE_ARTIFACT_PROVENANCE) shouldNotBeNull {
String(use { readBytes() }) shouldBe "source artifact updated"
}
}
}
Expand Down
27 changes: 11 additions & 16 deletions model/src/main/kotlin/utils/FileArchiver.kt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.ossreviewtoolkit.model.utils

import java.io.File
import java.io.IOException

import kotlin.time.measureTime
import kotlin.time.measureTimedValue
Expand Down Expand Up @@ -63,7 +62,7 @@ class FileArchiver(
/**
* Return whether an archive corresponding to [provenance] exists.
*/
fun hasArchive(provenance: KnownProvenance): Boolean = storage.hasFile(provenance)
fun hasArchive(provenance: KnownProvenance): Boolean = storage.hasData(provenance)

Check warning on line 65 in model/src/main/kotlin/utils/FileArchiver.kt

View check run for this annotation

Codecov / codecov/patch

model/src/main/kotlin/utils/FileArchiver.kt#L65

Added line #L65 was not covered by tests

/**
* Archive all files in [directory] matching any of the configured patterns in the [storage].
Expand Down Expand Up @@ -91,7 +90,7 @@ class FileArchiver(

logger.info { "Archived directory '$directory' in $zipDuration." }

val writeDuration = measureTime { storage.putFile(provenance, zipFile) }
val writeDuration = measureTime { storage.putData(provenance, zipFile.inputStream()) }

logger.info { "Wrote archive of directory '$directory' to storage in $writeDuration." }

Expand All @@ -102,26 +101,22 @@ class FileArchiver(
* Unarchive the archive corresponding to [provenance].
*/
fun unarchive(directory: File, provenance: KnownProvenance): Boolean {
val (zipFile, readDuration) = measureTimedValue { storage.getFile(provenance) }
val (zipInputStream, readDuration) = measureTimedValue { storage.getData(provenance) }

logger.info { "Read archive of directory '$directory' from storage in $readDuration." }

if (zipFile == null) return false
if (zipInputStream == null) return false

return try {
val unzipDuration = measureTime { zipFile.unpackZip(directory) }
return runCatching {
val unzipDuration = measureTime { zipInputStream.unpackZip(directory) }

logger.info { "Unarchived directory '$directory' in $unzipDuration." }
logger.info { "Unarchived data for $provenance to '$directory' in $unzipDuration." }

true
} catch (e: IOException) {
e.showStackTrace()
}.onFailure {
it.showStackTrace()

Check warning on line 117 in model/src/main/kotlin/utils/FileArchiver.kt

View check run for this annotation

Codecov / codecov/patch

model/src/main/kotlin/utils/FileArchiver.kt#L117

Added line #L117 was not covered by tests

logger.error { "Could not extract ${zipFile.absolutePath}: ${e.collectMessages()}" }

false
} finally {
zipFile.delete()
}
logger.error { "Failed to unarchive data for $provenance: ${it.collectMessages()}" }

Check warning on line 119 in model/src/main/kotlin/utils/FileArchiver.kt

View check run for this annotation

Codecov / codecov/patch

model/src/main/kotlin/utils/FileArchiver.kt#L119

Added line #L119 was not covered by tests
}.isSuccess
}
}
32 changes: 10 additions & 22 deletions model/src/main/kotlin/utils/FileProvenanceFileStorage.kt
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@

package org.ossreviewtoolkit.model.utils

import java.io.File
import java.io.IOException
import java.io.InputStream

import org.apache.logging.log4j.kotlin.Logging

Expand All @@ -29,7 +28,6 @@ import org.ossreviewtoolkit.model.HashAlgorithm
import org.ossreviewtoolkit.model.KnownProvenance
import org.ossreviewtoolkit.model.RepositoryProvenance
import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.ort.createOrtTempFile
import org.ossreviewtoolkit.utils.ort.storage.FileStorage

/**
Expand All @@ -55,34 +53,24 @@ class FileProvenanceFileStorage(
}
}

override fun hasFile(provenance: KnownProvenance): Boolean {
override fun hasData(provenance: KnownProvenance): Boolean {
val filePath = getFilePath(provenance)

return storage.exists(filePath)
}

override fun putFile(provenance: KnownProvenance, file: File) {
storage.write(getFilePath(provenance), file.inputStream())
override fun putData(provenance: KnownProvenance, data: InputStream) {
storage.write(getFilePath(provenance), data)
}

override fun getFile(provenance: KnownProvenance): File? {
override fun getData(provenance: KnownProvenance): InputStream? {
val filePath = getFilePath(provenance)

val file = createOrtTempFile(suffix = File(filename).extension)

return try {
storage.read(filePath).use { inputStream ->
file.outputStream().use { outputStream ->
inputStream.copyTo(outputStream)
}
}

file
} catch (e: IOException) {
logger.error { "Could not read from $filePath: ${e.collectMessages()}" }

null
}
return runCatching {
storage.read(filePath)
}.onFailure {
logger.error { "Could not read from $filePath: ${it.collectMessages()}" }

Check warning on line 72 in model/src/main/kotlin/utils/FileProvenanceFileStorage.kt

View check run for this annotation

Codecov / codecov/patch

model/src/main/kotlin/utils/FileProvenanceFileStorage.kt#L72

Added line #L72 was not covered by tests
}.getOrNull()
}

private fun getFilePath(provenance: KnownProvenance): String = "${provenance.hash()}/$filename"
Expand Down
27 changes: 8 additions & 19 deletions model/src/main/kotlin/utils/PostgresProvenanceFileStorage.kt
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@

package org.ossreviewtoolkit.model.utils

import java.io.File
import java.io.IOException
import java.io.InputStream

import javax.sql.DataSource

Expand All @@ -44,7 +43,6 @@ import org.ossreviewtoolkit.model.RepositoryProvenance
import org.ossreviewtoolkit.model.utils.DatabaseUtils.checkDatabaseEncoding
import org.ossreviewtoolkit.model.utils.DatabaseUtils.tableExists
import org.ossreviewtoolkit.model.utils.DatabaseUtils.transaction
import org.ossreviewtoolkit.utils.ort.createOrtTempFile

/**
* A [DataSource]-based implementation of [ProvenanceFileStorage] that stores files associated by [KnownProvenance] in a
Expand Down Expand Up @@ -81,27 +79,27 @@ class PostgresProvenanceFileStorage(
}
}

override fun hasFile(provenance: KnownProvenance): Boolean =
override fun hasData(provenance: KnownProvenance): Boolean =
database.transaction {
table.slice(table.provenance.count()).select {
table.provenance eq provenance.storageKey()
}.first()[table.provenance.count()].toInt()
} == 1

override fun putFile(provenance: KnownProvenance, file: File) {
override fun putData(provenance: KnownProvenance, data: InputStream) {
database.transaction {
table.deleteWhere {
table.provenance eq provenance.storageKey()
}

table.insert {
it[this.provenance] = provenance.storageKey()
it[zipData] = file.readBytes()
table.insert { statement ->
statement[this.provenance] = provenance.storageKey()
statement[zipData] = data.use { it.readBytes() }
}
}
}

override fun getFile(provenance: KnownProvenance): File? {
override fun getData(provenance: KnownProvenance): InputStream? {
val bytes = database.transaction {
table.select {
table.provenance eq provenance.storageKey()
Expand All @@ -110,16 +108,7 @@ class PostgresProvenanceFileStorage(
}.firstOrNull()
} ?: return null

val file = createOrtTempFile(suffix = ".zip")

try {
file.writeBytes(bytes)
} catch (e: IOException) {
file.delete()
throw e
}

return file
return bytes.inputStream()
}
}

Expand Down
19 changes: 10 additions & 9 deletions model/src/main/kotlin/utils/ProvenanceFileStorage.kt
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,28 @@
*/
package org.ossreviewtoolkit.model.utils

import java.io.File
import java.io.InputStream

import org.ossreviewtoolkit.model.KnownProvenance

/**
* A generic storage interface that associates a [KnownProvenance] with a file.
* A generic storage interface that associates a [KnownProvenance] with a stream of data.
*/
interface ProvenanceFileStorage {
/**
* Return whether a file is associated by [provenance].
* Return whether any data is associated by [provenance].
*/
fun hasFile(provenance: KnownProvenance): Boolean
fun hasData(provenance: KnownProvenance): Boolean

/**
* Associate [provenance] with the given [file]. Overwrites any existing association by [provenance].
* Associate [provenance] with the given [data]. Replaces any existing association by [provenance]. The function
* implementation is responsible for closing the stream.
*/
fun putFile(provenance: KnownProvenance, file: File)
fun putData(provenance: KnownProvenance, data: InputStream)

/**
* Return the file associated by [provenance], or null if there is no such file. Note that the returned file is a
* temporary file that the caller is responsible for.
* Return the data associated by [provenance], or null if there is no such data. Note that it is the responsibility
* of the caller to close the stream.
*/
fun getFile(provenance: KnownProvenance): File?
fun getData(provenance: KnownProvenance): InputStream?
}
20 changes: 6 additions & 14 deletions scanner/src/main/kotlin/utils/FileListResolver.kt
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,16 @@ import com.fasterxml.jackson.module.kotlin.readValue

import java.io.File

import org.ossreviewtoolkit.model.FileFormat
import org.ossreviewtoolkit.model.HashAlgorithm
import org.ossreviewtoolkit.model.KnownProvenance
import org.ossreviewtoolkit.model.toYaml
import org.ossreviewtoolkit.model.utils.ProvenanceFileStorage
import org.ossreviewtoolkit.model.writeValue
import org.ossreviewtoolkit.model.yamlMapper
import org.ossreviewtoolkit.scanner.FileList
import org.ossreviewtoolkit.scanner.FileList.FileEntry
import org.ossreviewtoolkit.scanner.provenance.ProvenanceDownloader
import org.ossreviewtoolkit.utils.common.FileMatcher
import org.ossreviewtoolkit.utils.common.VCS_DIRECTORIES
import org.ossreviewtoolkit.utils.ort.createOrtTempFile

internal class FileListResolver(
private val storage: ProvenanceFileStorage,
Expand All @@ -47,23 +46,16 @@ internal class FileListResolver(
}
}

fun has(provenance: KnownProvenance): Boolean = storage.hasFile(provenance)
fun has(provenance: KnownProvenance): Boolean = storage.hasData(provenance)
}

private fun ProvenanceFileStorage.putFileList(provenance: KnownProvenance, fileList: FileList) {
val tempFile = createOrtTempFile(prefix = "file-list", suffix = ".yml")

tempFile.writeValue(fileList)
putFile(provenance, tempFile)

tempFile.delete()
putData(provenance, fileList.toYaml().byteInputStream())
}

private fun ProvenanceFileStorage.getFileList(provenance: KnownProvenance): FileList? {
val file = getFile(provenance) ?: return null

// Cannot rely on the extension of the file to reflect its type, so use YAML explicitly.
return FileFormat.YAML.mapper.readValue<FileList>(file).also { file.delete() }
val data = getData(provenance) ?: return null
return data.use { yamlMapper.readValue<FileList>(it) }
}

private val IGNORED_DIRECTORY_MATCHER by lazy {
Expand Down

0 comments on commit 992cd8f

Please sign in to comment.