[ci] Rebuild Docker images if necessary (apache#11329)

This rebuilds Docker images and uses them in later stages in the same build. If the build is running on `main`, then the images are uploaded to Docker Hub automatically once the run is complete. Images are always rebuilt, but Docker Hub functions as a cache. If there have been no changes to `docker/` since the last available hash on Docker Hub, then the build will just use the images from Hub.
Kathryn-cat · Jun 10, 2022 · dd75257 · dd75257
1 parent 32a6517
commit dd75257
Show file tree

Hide file tree

Showing 12 changed files with 737 additions and 300 deletions.
diff --git a/Jenkinsfile b/Jenkinsfile
diff --git a/jenkins/Build.groovy.j2 b/jenkins/Build.groovy.j2
@@ -52,6 +52,29 @@ def add_hexagon_permissions() {
   {% endfor %}
 }
 
+// Run make. First try to do an incremental make from a previous workspace in hope to
+// accelerate the compilation. If something is wrong, clean the workspace and then
+// build from scratch.
+def make(docker_type, path, make_flag) {
+  timeout(time: max_time, unit: 'MINUTES') {
+    try {
+      cmake_build(docker_type, path, make_flag)
+    } catch (hudson.AbortException ae) {
+      // script exited due to user abort, directly throw instead of retry
+      if (ae.getMessage().contains('script returned exit code 143')) {
+        throw ae
+      }
+      echo 'Incremental compilation failed. Fall back to build from scratch'
+      sh (
+        script: "${docker_run} ${docker_type} ./tests/scripts/task_clean.sh ${path}",
+        label: 'Clear old cmake workspace',
+      )
+      cmake_build(docker_type, path, make_flag)
+    }
+  }
+}
+
+
 def build() {
 stage('Build') {
   environment {

diff --git a/jenkins/Deploy.groovy.j2 b/jenkins/Deploy.groovy.j2
@@ -16,6 +16,25 @@ stage('Build packages') {
 }
 */
 
+
+def update_docker(ecr_image, hub_image) {
+  if (!ecr_image.contains("amazonaws.com")) {
+    sh("echo Skipping '${ecr_image}' since it doesn't look like an ECR image")
+    return
+  }
+  docker_init(ecr_image)
+  sh(
+    script: """
+    set -eux
+    docker tag \
+      ${ecr_image} \
+      ${hub_image}
+    docker push ${hub_image}
+    """,
+    label: "Update ${hub_image} on Docker Hub",
+  )
+}
+
 def deploy_docs() {
   // Note: This code must stay in the Jenkinsfile to ensure that it runs
   // from a trusted context only
@@ -67,5 +86,36 @@ def deploy() {
         }
       }
     }
+    if (env.BRANCH_NAME == 'main' && env.DEPLOY_DOCKER_IMAGES == 'yes' && rebuild_docker_images && upstream_revision != null) {
+      node('CPU') {
+        ws({{ m.per_exec_ws('tvm/deploy-docker') }}) {
+          try {
+            withCredentials([string(
+              credentialsId: 'dockerhub-tlcpackstaging-key',
+              variable: 'DOCKERHUB_KEY',
+            )]) {
+              sh(
+                script: 'docker login -u tlcpackstaging -p ${DOCKERHUB_KEY}',
+                label: 'Log in to Docker Hub',
+              )
+            }
+            def date_Ymd_HMS = sh(
+              script: 'python3 -c \'import datetime; print(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))\'',
+              label: 'Determine date',
+              returnStdout: true,
+            ).trim()
+            def tag = "${date_Ymd_HMS}-${upstream_revision.substring(0, 8)}"
+            {% for image in images %}
+            update_docker({{ image.name }}, "tlcpackstaging/test_{{ image.name }}:${tag}")
+            {% endfor %}
+          } finally {
+            sh(
+              script: 'docker logout',
+              label: 'Clean up login credentials'
+            )
+          }
+        }
+      }
+    }
   }
 }
diff --git a/jenkins/DockerBuild.groovy.j2 b/jenkins/DockerBuild.groovy.j2
@@ -1,166 +1,116 @@
-def build_image(image_name) {
-  hash = sh(
+def ecr_push(full_name) {
+  aws_account_id = sh(
     returnStdout: true,
-    script: 'git log -1 --format=\'%h\''
+    script: 'aws sts get-caller-identity | grep Account | cut -f4 -d\\"',
+    label: 'Get AWS ID'
   ).trim()
-  def full_name = "${image_name}:${env.BRANCH_NAME}-${hash}-${env.BUILD_NUMBER}"
-  sh(
-    script: "${docker_build} ${image_name} --spec ${full_name}",
-    label: 'Build docker image'
-  )
+
+  def ecr_name = "${aws_account_id}.{{ aws_ecr_url }}/${full_name}"
+  try {
+    withEnv([
+      "AWS_ACCOUNT_ID=${aws_account_id}",
+      'AWS_DEFAULT_REGION={{ aws_default_region }}',
+      "AWS_ECR_REPO=${aws_account_id}.{{ aws_ecr_url }}"]) {
+      sh(
+        script: '''
+          set -eux
+          aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $AWS_ECR_REPO
+        ''',
+        label: 'Log in to ECR'
+      )
+      sh(
+        script: """
+          set -x
+          docker tag ${full_name} \$AWS_ECR_REPO/${full_name}
+          docker push \$AWS_ECR_REPO/${full_name}
+        """,
+        label: 'Upload image to ECR'
+      )
+    }
+  } finally {
+    withEnv([
+      "AWS_ACCOUNT_ID=${aws_account_id}",
+      'AWS_DEFAULT_REGION={{ aws_default_region }}',
+      "AWS_ECR_REPO=${aws_account_id}.{{ aws_ecr_url }}"]) {
+      sh(
+        script: 'docker logout $AWS_ECR_REPO',
+        label: 'Clean up login credentials'
+      )
+    }
+  }
+  return ecr_name
+}
+
+def ecr_pull(full_name) {
   aws_account_id = sh(
     returnStdout: true,
     script: 'aws sts get-caller-identity | grep Account | cut -f4 -d\\"',
     label: 'Get AWS ID'
   ).trim()
 
   try {
-    // Use a credential so Jenkins knows to scrub the AWS account ID which is nice
-    // (but so we don't have to rely it being hardcoded in Jenkins)
-    withCredentials([string(
-      credentialsId: 'aws-account-id',
-      variable: '_ACCOUNT_ID_DO_NOT_USE',
-      )]) {
-      withEnv([
-        "AWS_ACCOUNT_ID=${aws_account_id}",
-        'AWS_DEFAULT_REGION=us-west-2']) {
-        sh(
-          script: '''
-            set -x
-            aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com
-          ''',
-          label: 'Log in to ECR'
-        )
-        sh(
-          script: """
-            set -x
-            docker tag ${full_name} \$AWS_ACCOUNT_ID.dkr.ecr.\$AWS_DEFAULT_REGION.amazonaws.com/${full_name}
-            docker push \$AWS_ACCOUNT_ID.dkr.ecr.\$AWS_DEFAULT_REGION.amazonaws.com/${full_name}
-          """,
-          label: 'Upload image to ECR'
-        )
-      }
+    withEnv([
+      "AWS_ACCOUNT_ID=${aws_account_id}",
+      'AWS_DEFAULT_REGION={{ aws_default_region }}',
+      "AWS_ECR_REPO=${aws_account_id}.{{ aws_ecr_url }}"]) {
+      sh(
+        script: '''
+          set -eux
+          aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $AWS_ECR_REPO
+        ''',
+        label: 'Log in to ECR'
+      )
+      sh(
+        script: """
+          set -eux
+          docker pull ${full_name}
+        """,
+        label: 'Pull image from ECR'
+      )
     }
   } finally {
-    sh(
-      script: 'rm -f ~/.docker/config.json',
-      label: 'Clean up login credentials'
-    )
+    withEnv([
+      "AWS_ACCOUNT_ID=${aws_account_id}",
+      'AWS_DEFAULT_REGION={{ aws_default_region }}',
+      "AWS_ECR_REPO=${aws_account_id}.{{ aws_ecr_url }}"]) {
+      sh(
+        script: 'docker logout $AWS_ECR_REPO',
+        label: 'Clean up login credentials'
+      )
+    }
   }
+}
+
+def build_image(image_name) {
+  hash = sh(
+    returnStdout: true,
+    script: 'git log -1 --format=\'%h\''
+  ).trim()
+  def full_name = "${image_name}:${env.BRANCH_NAME}-${hash}-${env.BUILD_NUMBER}"
   sh(
-    script: "docker rmi ${full_name}",
-    label: 'Remove docker image'
+    script: "${docker_build} ${image_name} --spec ${full_name}",
+    label: 'Build docker image'
   )
+  return ecr_push(full_name)
 }
 
+
 def build_docker_images() {
   stage('Docker Image Build') {
-    // TODO in a follow up PR: Find ecr tag and use in subsequent builds
-    parallel 'ci-lint': {
-      node('CPU') {
-        timeout(time: max_time, unit: 'MINUTES') {
-          docker_init('none')
-          init_git()
-          build_image('ci_lint')
-        }
-      }
-    }, 'ci-cpu': {
-      node('CPU') {
-        timeout(time: max_time, unit: 'MINUTES') {
-          docker_init('none')
-          init_git()
-          build_image('ci_cpu')
+    parallel(
+    {% for image in images %}
+      '{{ image.name }}': {
+        node('{{ image.platform }}') {
+          timeout(time: max_time, unit: 'MINUTES') {
+            init_git()
+            // We're purposefully not setting the built image here since they
+            // are not yet being uploaded to tlcpack
+            // {{ image.name }} = build_image('{{ image.name }}')
+            build_image('{{ image.name }}')
+          }
         }
-      }
-    }, 'ci-gpu': {
-      node('GPU') {
-        timeout(time: max_time, unit: 'MINUTES') {
-          docker_init('none')
-          init_git()
-          build_image('ci_gpu')
-        }
-      }
-    }, 'ci-qemu': {
-      node('CPU') {
-        timeout(time: max_time, unit: 'MINUTES') {
-          docker_init('none')
-          init_git()
-          build_image('ci_qemu')
-        }
-      }
-    }, 'ci-i386': {
-      node('CPU') {
-        timeout(time: max_time, unit: 'MINUTES') {
-          docker_init('none')
-          init_git()
-          build_image('ci_i386')
-        }
-      }
-    }, 'ci-arm': {
-      node('ARM') {
-        timeout(time: max_time, unit: 'MINUTES') {
-          docker_init('none')
-          init_git()
-          build_image('ci_arm')
-        }
-      }
-    }, 'ci-wasm': {
-      node('CPU') {
-        timeout(time: max_time, unit: 'MINUTES') {
-          docker_init('none')
-          init_git()
-          build_image('ci_wasm')
-        }
-      }
-    }, 'ci-hexagon': {
-      node('CPU') {
-        timeout(time: max_time, unit: 'MINUTES') {
-          docker_init('none')
-          init_git()
-          build_image('ci_hexagon')
-        }
-      }
-    }
-  }
-  // // TODO: Once we are able to use the built images, enable this step
-  // // If the docker images changed, we need to run the image build before the lint
-  // // can run since it requires a base docker image. Most of the time the images
-  // // aren't build though so it's faster to use the same node that checks for
-  // // docker changes to run the lint in the usual case.
-  // stage('Sanity Check (re-run)') {
-  //   timeout(time: max_time, unit: 'MINUTES') {
-  //     node('CPU') {
-  //       ws({{ m.per_exec_ws('tvm/sanity') }}) {
-  //         init_git()
-  //         sh (
-  //           script: "${docker_run} ${ci_lint}  ./tests/scripts/task_lint.sh",
-  //           label: 'Run lint',
-  //         )
-  //       }
-  //     }
-  //   }
-  // }
-}
-
-// Run make. First try to do an incremental make from a previous workspace in hope to
-// accelerate the compilation. If something is wrong, clean the workspace and then
-// build from scratch.
-def make(docker_type, path, make_flag) {
-  timeout(time: max_time, unit: 'MINUTES') {
-    try {
-      cmake_build(docker_type, path, make_flag)
-      // always run cpp test when build
-    } catch (hudson.AbortException ae) {
-      // script exited due to user abort, directly throw instead of retry
-      if (ae.getMessage().contains('script returned exit code 143')) {
-        throw ae
-      }
-      echo 'Incremental compilation failed. Fall back to build from scratch'
-      sh (
-        script: "${docker_run} ${docker_type} ./tests/scripts/task_clean.sh ${path}",
-        label: 'Clear old cmake workspace',
-      )
-      cmake_build(docker_type, path, make_flag)
-    }
+      },
+    {% endfor %}
+    )
   }
 }
diff --git a/jenkins/Jenkinsfile.j2 b/jenkins/Jenkinsfile.j2
@@ -100,6 +100,9 @@ if (currentBuild.getBuildCauses().toString().contains('BranchIndexingCause')) {
 {% set hexagon_api = ['build/hexagon_api_output',] %}
 s3_prefix = "tvm-jenkins-artifacts-prod/tvm/${env.BRANCH_NAME}/${env.BUILD_NUMBER}"
 
+{% set aws_default_region = "us-west-2" %}
+{% set aws_ecr_url = "dkr.ecr." + aws_default_region + ".amazonaws.com" %}
+
 // General note: Jenkins has limits on the size of a method (or top level code)
 // that are pretty strict, so most usage of groovy methods in these templates
 // are purely to satisfy the JVM

diff --git a/jenkins/Lint.groovy.j2 b/jenkins/Lint.groovy.j2
@@ -2,11 +2,11 @@ def lint() {
   stage('Lint') {
     parallel(
       {% call m.sharded_lint_step(
-        name='Lint',
-        num_shards=2,
-        node='CPU-SMALL',
-        ws='tvm/lint',
-        docker_image='ci_lint',
+          name='Lint',
+          num_shards=2,
+          node='CPU-SMALL',
+          ws='tvm/lint',
+          docker_image='ci_lint',
         )
       %}
         sh (

diff --git a/jenkins/Prepare.groovy.j2 b/jenkins/Prepare.groovy.j2
@@ -69,6 +69,17 @@ def docker_init(image) {
     """,
     label: 'Clean old Docker images',
   )
+
+  if (image.contains("amazonaws.com")) {
+    // If this string is in the image name it's from ECR and needs to be pulled
+    // with the right credentials
+    ecr_pull(image)
+  } else {
+    sh(
+      script: "docker pull ${image}",
+      label: 'Pull docker image',
+    )
+  }
 }
 
 def should_skip_slow_tests(pr_number) {