From 4dd6a56e9fd78caf802b368988aecddc40fcaeaf Mon Sep 17 00:00:00 2001 From: Oliver Heger Date: Thu, 2 Feb 2023 12:46:22 +0100 Subject: [PATCH] feat(PackageManager): Support path excludes in findManagedFiles() This allows skipping projects in specific paths already before starting the analysis. That way problematic / large projects that are irrelevant compliance-wise can be excluded leading to reduced resource usage and analysis time. Signed-off-by: Oliver Heger --- analyzer/src/main/kotlin/PackageManager.kt | 31 +++++++- .../src/test/kotlin/PackageManagerTest.kt | 77 +++++++++++++++---- 2 files changed, 89 insertions(+), 19 deletions(-) diff --git a/analyzer/src/main/kotlin/PackageManager.kt b/analyzer/src/main/kotlin/PackageManager.kt index ebbd92916131..da77ea6eaec4 100644 --- a/analyzer/src/main/kotlin/PackageManager.kt +++ b/analyzer/src/main/kotlin/PackageManager.kt @@ -27,6 +27,7 @@ import java.nio.file.Path import java.nio.file.SimpleFileVisitor import java.nio.file.attribute.BasicFileAttributes +import kotlin.io.path.pathString import kotlin.time.measureTime import org.apache.logging.log4j.kotlin.Logging @@ -93,20 +94,25 @@ abstract class PackageManager( /** * Recursively search the [directory] for files managed by any of the [packageManagers]. The search is performed - * depth-first so that root project files are found before any subproject files for a specific manager. + * depth-first so that root project files are found before any subproject files for a specific manager. Path + * excludes defined by the given [excludes] are taken into account; the corresponding directories are skipped. */ fun findManagedFiles( directory: File, - packageManagers: Collection = ALL.values + packageManagers: Collection = ALL.values, + excludes: Excludes = Excludes.EMPTY ): ManagedProjectFiles { require(directory.isDirectory) { "The provided path is not a directory: ${directory.absolutePath}" } + logger.debug { "Searching for managed files using the following excludes: $excludes" } + val result = mutableMapOf>() + val rootPath = directory.toPath() Files.walkFileTree( - directory.toPath(), + rootPath, object : SimpleFileVisitor() { override fun preVisitDirectory(dir: Path, attributes: BasicFileAttributes): FileVisitResult { if (IGNORED_DIRECTORY_MATCHERS.any { it.matches(dir) }) { @@ -117,6 +123,14 @@ abstract class PackageManager( return FileVisitResult.SKIP_SUBTREE } + if (excludes.isPathExcluded(rootPath, dir)) { + logger.info { + "Not analyzing directory '$dir' as it is excluded." + } + + return FileVisitResult.SKIP_SUBTREE + } + val dirAsFile = dir.toFile() // Note that although FileVisitOption.FOLLOW_LINKS is not set, this would still follow junctions @@ -126,7 +140,9 @@ abstract class PackageManager( return FileVisitResult.SKIP_SUBTREE } - val filesInDir = dirAsFile.walk().maxDepth(1).filter { it.isFile }.toList() + val filesInDir = dirAsFile.walk().maxDepth(1).filter { + it.isFile && !excludes.isPathExcluded(rootPath, it.toPath()) + }.toList() packageManagers.distinct().forEach { manager -> // Create a list of lists of matching files per glob. @@ -209,6 +225,13 @@ abstract class PackageManager( */ internal fun AnalyzerConfiguration.excludes(repositoryConfiguration: RepositoryConfiguration): Excludes = repositoryConfiguration.excludes.takeIf { skipExcluded } ?: Excludes.EMPTY + + /** + * Check whether the given [path] interpreted relatively against [root] is matched by a path exclude in this + * [Excludes] object. + */ + private fun Excludes.isPathExcluded(root: Path, path: Path): Boolean = + isPathExcluded(root.relativize(path).pathString) } /** diff --git a/analyzer/src/test/kotlin/PackageManagerTest.kt b/analyzer/src/test/kotlin/PackageManagerTest.kt index 2badb4e7da2c..cdfbd44e81c3 100644 --- a/analyzer/src/test/kotlin/PackageManagerTest.kt +++ b/analyzer/src/test/kotlin/PackageManagerTest.kt @@ -29,10 +29,16 @@ import io.kotest.matchers.maps.beEmpty import io.kotest.matchers.should import io.kotest.matchers.shouldBe +import java.io.File + import org.ossreviewtoolkit.analyzer.managers.* import org.ossreviewtoolkit.model.VcsInfo import org.ossreviewtoolkit.model.VcsType +import org.ossreviewtoolkit.model.config.Excludes +import org.ossreviewtoolkit.model.config.PathExclude +import org.ossreviewtoolkit.model.config.PathExcludeReason import org.ossreviewtoolkit.utils.test.createSpecTempDir +import org.ossreviewtoolkit.utils.test.createTestTempDir class PackageManagerTest : WordSpec({ val definitionFiles = listOf( @@ -71,11 +77,7 @@ class PackageManagerTest : WordSpec({ val projectDir = createSpecTempDir() beforeSpec { - definitionFiles.forEach { file -> - projectDir.resolve(file).also { dir -> - dir.parentFile.mkdirs() - }.writeText("Dummy text to avoid the file to be empty, as empty files are skipped.") - } + definitionFiles.writeFiles(projectDir) } "findManagedFiles" should { @@ -88,11 +90,7 @@ class PackageManagerTest : WordSpec({ it is Unmanaged.Factory } - // The keys in expected and actual maps of definition files are different instances of package manager - // factories. So to compare values use the package manager types as keys instead. - val managedFilesByName = managedFiles.map { (manager, files) -> - manager.type to files.map { it.relativeTo(projectDir).invariantSeparatorsPath } - }.toMap() + val managedFilesByName = managedFiles.groupByName(projectDir) assertSoftly { managedFilesByName["Bower"] should containExactly("bower/bower.json") @@ -144,11 +142,7 @@ class PackageManagerTest : WordSpec({ managedFiles.size shouldBe 3 - // The keys in expected and actual maps of definition files are different instances of package manager - // factories. So to compare values use the package manager types as keys instead. - val managedFilesByName = managedFiles.map { (manager, files) -> - manager.type to files.map { it.relativeTo(projectDir).invariantSeparatorsPath } - }.toMap() + val managedFilesByName = managedFiles.groupByName(projectDir) managedFilesByName["Gradle"] should containExactlyInAnyOrder( "gradle-groovy/build.gradle", @@ -167,6 +161,38 @@ class PackageManagerTest : WordSpec({ managedFiles should beEmpty() } + "take path excludes into account" { + val tempDir = "test/" + val definitionFilesWithExcludes = definitionFiles + + listOf("pom.xml", "build.gradle", "build.sbt").map { "$tempDir$it" } + val rootDir = createTestTempDir() + definitionFilesWithExcludes.writeFiles(rootDir) + + val pathExclude = PathExclude("$tempDir**", PathExcludeReason.TEST_OF) + val excludes = Excludes(paths = listOf(pathExclude)) + + val managedFilesByName = PackageManager.findManagedFiles(rootDir, excludes = excludes).groupByName(rootDir) + + managedFilesByName["Gradle"] should containExactlyInAnyOrder( + "gradle-groovy/build.gradle", + "gradle-kotlin/build.gradle.kts" + ) + managedFilesByName["Maven"] should containExactly("maven/pom.xml") + managedFilesByName["SBT"] should containExactly("sbt/build.sbt") + } + + "handle specific excluded definition files" { + val pathExclude = PathExclude("gradle-groovy/build.gradle", PathExcludeReason.OTHER) + val excludes = Excludes(paths = listOf(pathExclude)) + + val managedFiles = PackageManager.findManagedFiles(projectDir, excludes = excludes) + val managedFilesByName = managedFiles.groupByName(projectDir) + + managedFilesByName["Gradle"] should containExactly( + "gradle-kotlin/build.gradle.kts" + ) + } + "fail if the provided file is not a directory" { shouldThrow { PackageManager.findManagedFiles(projectDir.resolve("pom.xml")) @@ -221,3 +247,24 @@ class PackageManagerTest : WordSpec({ } } }) + +/** + * Transform this map with definition files grouped by package manager factories, so that the results of specific + * package managers can be easily accessed. The keys in expected and actual maps of definition files are different + * instances of package manager factories. So to compare values use the package manager types as keys instead. + */ +private fun ManagedProjectFiles.groupByName(projectDir: File) = + map { (manager, files) -> + manager.type to files.map { it.relativeTo(projectDir).invariantSeparatorsPath } + }.toMap() + +/** + * Create files with a dummy content in the given [directory] for all the path names in this collection. + */ +private fun Collection.writeFiles(directory: File) { + forEach { file -> + directory.resolve(file).also { dir -> + dir.parentFile.mkdirs() + }.writeText("Dummy text to avoid the file to be empty, as empty files are skipped.") + } +}