From 430fc406c191852e89ab0076108defc3fd144d3d Mon Sep 17 00:00:00 2001 From: kshakir Date: Fri, 10 May 2024 18:06:48 -0400 Subject: [PATCH] WX-1626 Docker soft links (#6741) --- CHANGELOG.md | 7 ++ docs/Configuring.md | 2 +- docs/backends/HPC.md | 71 ++++++++++++++++++- .../backend/sfs/SharedFileSystem.scala | 5 +- .../backend/sfs/SharedFileSystemSpec.scala | 29 ++++++++ 5 files changed, 109 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aecc6361de4..50af73e8260 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Cromwell Change Log +## 88 Release Notes + +### Optional docker soft links + +Cromwell now allows opting into configured soft links on shared file systems such as HPC environments. More details can +be found [here](https://cromwell.readthedocs.io/en/stable/backends/HPC/#optional-docker-soft-links). + ## 87 Release Notes ### `upgrade` command removed from Womtool diff --git a/docs/Configuring.md b/docs/Configuring.md index 5dc2a43d2ca..1ed7a5f53fc 100644 --- a/docs/Configuring.md +++ b/docs/Configuring.md @@ -519,7 +519,7 @@ config section: md5. * `md5`. The well-known md5sum algorithm * Path based options. These are based on filepath. Extremely lightweight, but only work with the `soft-link` file -caching strategy and can therefore never work with containers. +caching strategy and can therefore do not work with containers by default. * `path` creates a md5 hash of the path. * `path+modtime` creates a md5 hash of the path and its modification time. * Fingerprinting. This strategy works with containers. diff --git a/docs/backends/HPC.md b/docs/backends/HPC.md index ef4de8d040e..fd40e8bc17d 100644 --- a/docs/backends/HPC.md +++ b/docs/backends/HPC.md @@ -21,13 +21,14 @@ Each `call` has its own subdirectory located at `/call-/inputs` directory. There are different localization strategies that Cromwell will try until one works: * `hard-link` - This will create a hard link to the file -* `soft-link` - Create a symbolic link to the file. This strategy is not applicable for tasks which specify a Docker image and will be ignored. +* `soft-link` - Create a symbolic link to the file. This strategy is not enabled by default for tasks which specify a + Docker image and will be ignored. * `copy` - Make a copy the file * `cached-copy` An experimental feature. This copies files to a file cache in `/cached-inputs` and then hard links them in the `/inputs` directory. `cached-copy` is intended for a shared filesystem that runs on multiple physical disks, where docker containers are used. -Hard-links don't work between different physical disks and soft-links don't work with docker. Copying uses a lot of +Hard-links don't work between different physical disks and soft-links don't work with docker by default. Copying uses a lot of space if a multitude of tasks use the same input. `cached-copy` copies the file only once to the physical disk containing the `` and then uses hard links for every task that needs the input file. This can save a lot of space. @@ -45,6 +46,72 @@ filesystems { } ``` +### Optional docker soft links + +By default when Cromwell runs a local container it only mounts the workflow's execution directory. Thus any symbolic or +soft links pointing to files outside of the execution directory will resolve to paths that are not accessible within the +container. + +As discussed above regarding `cache-copy`, `soft-link` is disabled by default on docker and other container +environments, and hard-links do not work across different physical disks. + +However, it is possible to manually configure Cromwell to mount input paths such that soft links resolve outside and +inside containers. + +```hocon +backend { + default = "SlurmDocker" + providers { + SlurmDocker { + actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" + config { + runtime-attributes = """ + String? docker + """ + # https://slurm.schedmd.com/sbatch.html + submit-docker = """ + set -euo pipefail + CACHE_DIR=$HOME/.singularity/cache + mkdir -p $CACHE_DIR + LOCK_FILE=$CACHE_DIR/singularity_pull_flock + DOCKER_NAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< ${docker}) + IMAGE=$CACHE_DIR/$DOCKER_NAME.sif + ( + flock --verbose --exclusive --timeout 900 9 || exit 1 + if [ ! -e "$IMAGE" ]; then + singularity build $IMAGE docker://${docker} + fi + ) 9>$LOCK_FILE + sbatch \ + -J ${job_name} \ + -D ${cwd} \ + -o ${cwd}/execution/stdout \ + -e ${cwd}/execution/stderr \ + --wrap "singularity exec --containall --bind ${cwd}:${docker_cwd} --bind /mnt/one:/mnt/one:ro --bind /mnt/two:/mnt/two:ro $IMAGE ${job_shell} ${docker_script}" + """ + # ... other configuration ... + filesystems { + local { + caching.duplication-strategy = ["copy"] + localization = ["soft-link", "copy"] + docker.allow-soft-links: true + } + } + } + } + } +} +``` + +The important parts of the example configuration above are: +* `config.filesystems.local.docker.allow-soft-links` set to `true` +* `config.submit-docker` containing `--bind /mnt/one:/mnt/one:ro --bind /mnt/two:/mnt/two:ro` + +In this example the two directories `/mnt/one` and and `/mnt/two` will also be available within containers at their +original paths outside the container. So soft links pointing to paths under those directories will resolve during the +job execution. Note that if a user runs a workflow using an input file `/mnt/three/path/to/file` the job will fail +during execution as `/mnt/three` was not present inside the running container. + ### Additional FileSystems HPC backends (as well as the Local backend) can be configured to be able to interact with other type of filesystems, where the input files can be located for example. diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala index f1ab13d9234..de22ef88761 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala @@ -115,6 +115,7 @@ trait SharedFileSystem extends PathFactory { def sharedFileSystemConfig: Config lazy val maxHardLinks: Int = sharedFileSystemConfig.getOrElse[Int]("max-hardlinks", 950) // Windows limit 1024. Keep a safe margin. + lazy val dockerAllowSoftLinks: Boolean = sharedFileSystemConfig.getOrElse("docker.allow-soft-links", false) lazy val cachedCopyDir: Option[Path] = None private def localizePathViaCachedCopy(originalPath: Path, executionPath: Path, docker: Boolean): Try[Unit] = { @@ -217,10 +218,10 @@ trait SharedFileSystem extends PathFactory { } private def createStrategies(configStrategies: Seq[String], docker: Boolean): Seq[DuplicationStrategy] = { - // If localizing for a docker job, remove soft-link as an option + // If localizing for a docker job, by default remove soft-link as an option // If no cachedCopyDir is defined, cached-copy can not be used and is removed. val filteredConfigStrategies = configStrategies filter { - case "soft-link" if docker => false + case "soft-link" if docker => dockerAllowSoftLinks case "cached-copy" if cachedCopyDir.isEmpty => false case _ => true } diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala index 409ef3c281f..602822bac39 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala @@ -29,6 +29,12 @@ class SharedFileSystemSpec private val cachedCopyLocalization = ConfigFactory.parseString(""" localization: [cached-copy] """) private val cachedCopyLocalizationMaxHardlinks = ConfigFactory.parseString("""{localization: [cached-copy], max-hardlinks: 3 }""") + private val softLinkDockerLocalization = ConfigFactory.parseString( + """ + |localization: [soft-link] + |docker.allow-soft-links: true + |""".stripMargin + ) private val localPathBuilder = List(DefaultPathBuilder) def localizationTest(config: Config, @@ -104,6 +110,7 @@ class SharedFileSystemSpec it should "localize a file via symbolic link" in { localizationTest(softLinkLocalization, docker = false, symlink = true) + localizationTest(softLinkDockerLocalization, docker = true, symlink = true) } it should "localize a file via cached copy" in { @@ -182,6 +189,28 @@ class SharedFileSystemSpec dests.foreach(_.delete(swallowIOExceptions = true)) } + it should "throw a fatal exception if docker soft link localization fails" in { + val callDir = DefaultPathBuilder.createTempDirectory("SharedFileSystem") + val orig = DefaultPathBuilder.createTempFile("inputFile") + val testText = + """This is a simple text to check if the localization + | works correctly for the file contents. + |""".stripMargin + orig.touch() + orig.writeText(testText) + + val inputs = fqnWdlMapToDeclarationMap(Map("input" -> WomSingleFile(orig.pathAsString))) + val sharedFS: SharedFileSystem = new SharedFileSystem { + override val pathBuilders: PathBuilders = localPathBuilder + override val sharedFileSystemConfig: Config = softLinkLocalization + + implicit override def actorContext: ActorContext = null + } + val result = sharedFS.localizeInputs(callDir, docker = true)(inputs) + result.isFailure shouldBe true + result.failed.get.isInstanceOf[CromwellFatalExceptionMarker] shouldBe true + } + private[this] def countLinks(file: Path): Int = file.getAttribute("unix:nlink").asInstanceOf[Int] private[this] def isSymLink(file: Path): Boolean = file.isSymbolicLink