Skip to content

Commit

Permalink
[ci] Rebuild Docker images if necessary (apache#11329)
Browse files Browse the repository at this point in the history
This rebuilds Docker images and uses them in later stages in the same build. If the build is running on `main`, then the images are uploaded to Docker Hub automatically once the run is complete. Images are always rebuilt, but Docker Hub functions as a cache. If there have been no changes to `docker/` since the last available hash on Docker Hub, then the build will just use the images from Hub.
  • Loading branch information
driazati authored and Kathryn-cat committed Jun 10, 2022
1 parent 32a6517 commit dd75257
Show file tree
Hide file tree
Showing 12 changed files with 737 additions and 300 deletions.
393 changes: 254 additions & 139 deletions Jenkinsfile

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions jenkins/Build.groovy.j2
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,29 @@ def add_hexagon_permissions() {
{% endfor %}
}

// Run make. First try to do an incremental make from a previous workspace in hope to
// accelerate the compilation. If something is wrong, clean the workspace and then
// build from scratch.
def make(docker_type, path, make_flag) {
timeout(time: max_time, unit: 'MINUTES') {
try {
cmake_build(docker_type, path, make_flag)
} catch (hudson.AbortException ae) {
// script exited due to user abort, directly throw instead of retry
if (ae.getMessage().contains('script returned exit code 143')) {
throw ae
}
echo 'Incremental compilation failed. Fall back to build from scratch'
sh (
script: "${docker_run} ${docker_type} ./tests/scripts/task_clean.sh ${path}",
label: 'Clear old cmake workspace',
)
cmake_build(docker_type, path, make_flag)
}
}
}


def build() {
stage('Build') {
environment {
Expand Down
50 changes: 50 additions & 0 deletions jenkins/Deploy.groovy.j2
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,25 @@ stage('Build packages') {
}
*/


def update_docker(ecr_image, hub_image) {
if (!ecr_image.contains("amazonaws.com")) {
sh("echo Skipping '${ecr_image}' since it doesn't look like an ECR image")
return
}
docker_init(ecr_image)
sh(
script: """
set -eux
docker tag \
${ecr_image} \
${hub_image}
docker push ${hub_image}
""",
label: "Update ${hub_image} on Docker Hub",
)
}

def deploy_docs() {
// Note: This code must stay in the Jenkinsfile to ensure that it runs
// from a trusted context only
Expand Down Expand Up @@ -67,5 +86,36 @@ def deploy() {
}
}
}
if (env.BRANCH_NAME == 'main' && env.DEPLOY_DOCKER_IMAGES == 'yes' && rebuild_docker_images && upstream_revision != null) {
node('CPU') {
ws({{ m.per_exec_ws('tvm/deploy-docker') }}) {
try {
withCredentials([string(
credentialsId: 'dockerhub-tlcpackstaging-key',
variable: 'DOCKERHUB_KEY',
)]) {
sh(
script: 'docker login -u tlcpackstaging -p ${DOCKERHUB_KEY}',
label: 'Log in to Docker Hub',
)
}
def date_Ymd_HMS = sh(
script: 'python3 -c \'import datetime; print(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))\'',
label: 'Determine date',
returnStdout: true,
).trim()
def tag = "${date_Ymd_HMS}-${upstream_revision.substring(0, 8)}"
{% for image in images %}
update_docker({{ image.name }}, "tlcpackstaging/test_{{ image.name }}:${tag}")
{% endfor %}
} finally {
sh(
script: 'docker logout',
label: 'Clean up login credentials'
)
}
}
}
}
}
}
240 changes: 95 additions & 145 deletions jenkins/DockerBuild.groovy.j2
Original file line number Diff line number Diff line change
@@ -1,166 +1,116 @@
def build_image(image_name) {
hash = sh(
def ecr_push(full_name) {
aws_account_id = sh(
returnStdout: true,
script: 'git log -1 --format=\'%h\''
script: 'aws sts get-caller-identity | grep Account | cut -f4 -d\\"',
label: 'Get AWS ID'
).trim()
def full_name = "${image_name}:${env.BRANCH_NAME}-${hash}-${env.BUILD_NUMBER}"
sh(
script: "${docker_build} ${image_name} --spec ${full_name}",
label: 'Build docker image'
)

def ecr_name = "${aws_account_id}.{{ aws_ecr_url }}/${full_name}"
try {
withEnv([
"AWS_ACCOUNT_ID=${aws_account_id}",
'AWS_DEFAULT_REGION={{ aws_default_region }}',
"AWS_ECR_REPO=${aws_account_id}.{{ aws_ecr_url }}"]) {
sh(
script: '''
set -eux
aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $AWS_ECR_REPO
''',
label: 'Log in to ECR'
)
sh(
script: """
set -x
docker tag ${full_name} \$AWS_ECR_REPO/${full_name}
docker push \$AWS_ECR_REPO/${full_name}
""",
label: 'Upload image to ECR'
)
}
} finally {
withEnv([
"AWS_ACCOUNT_ID=${aws_account_id}",
'AWS_DEFAULT_REGION={{ aws_default_region }}',
"AWS_ECR_REPO=${aws_account_id}.{{ aws_ecr_url }}"]) {
sh(
script: 'docker logout $AWS_ECR_REPO',
label: 'Clean up login credentials'
)
}
}
return ecr_name
}

def ecr_pull(full_name) {
aws_account_id = sh(
returnStdout: true,
script: 'aws sts get-caller-identity | grep Account | cut -f4 -d\\"',
label: 'Get AWS ID'
).trim()

try {
// Use a credential so Jenkins knows to scrub the AWS account ID which is nice
// (but so we don't have to rely it being hardcoded in Jenkins)
withCredentials([string(
credentialsId: 'aws-account-id',
variable: '_ACCOUNT_ID_DO_NOT_USE',
)]) {
withEnv([
"AWS_ACCOUNT_ID=${aws_account_id}",
'AWS_DEFAULT_REGION=us-west-2']) {
sh(
script: '''
set -x
aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com
''',
label: 'Log in to ECR'
)
sh(
script: """
set -x
docker tag ${full_name} \$AWS_ACCOUNT_ID.dkr.ecr.\$AWS_DEFAULT_REGION.amazonaws.com/${full_name}
docker push \$AWS_ACCOUNT_ID.dkr.ecr.\$AWS_DEFAULT_REGION.amazonaws.com/${full_name}
""",
label: 'Upload image to ECR'
)
}
withEnv([
"AWS_ACCOUNT_ID=${aws_account_id}",
'AWS_DEFAULT_REGION={{ aws_default_region }}',
"AWS_ECR_REPO=${aws_account_id}.{{ aws_ecr_url }}"]) {
sh(
script: '''
set -eux
aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $AWS_ECR_REPO
''',
label: 'Log in to ECR'
)
sh(
script: """
set -eux
docker pull ${full_name}
""",
label: 'Pull image from ECR'
)
}
} finally {
sh(
script: 'rm -f ~/.docker/config.json',
label: 'Clean up login credentials'
)
withEnv([
"AWS_ACCOUNT_ID=${aws_account_id}",
'AWS_DEFAULT_REGION={{ aws_default_region }}',
"AWS_ECR_REPO=${aws_account_id}.{{ aws_ecr_url }}"]) {
sh(
script: 'docker logout $AWS_ECR_REPO',
label: 'Clean up login credentials'
)
}
}
}

def build_image(image_name) {
hash = sh(
returnStdout: true,
script: 'git log -1 --format=\'%h\''
).trim()
def full_name = "${image_name}:${env.BRANCH_NAME}-${hash}-${env.BUILD_NUMBER}"
sh(
script: "docker rmi ${full_name}",
label: 'Remove docker image'
script: "${docker_build} ${image_name} --spec ${full_name}",
label: 'Build docker image'
)
return ecr_push(full_name)
}


def build_docker_images() {
stage('Docker Image Build') {
// TODO in a follow up PR: Find ecr tag and use in subsequent builds
parallel 'ci-lint': {
node('CPU') {
timeout(time: max_time, unit: 'MINUTES') {
docker_init('none')
init_git()
build_image('ci_lint')
}
}
}, 'ci-cpu': {
node('CPU') {
timeout(time: max_time, unit: 'MINUTES') {
docker_init('none')
init_git()
build_image('ci_cpu')
parallel(
{% for image in images %}
'{{ image.name }}': {
node('{{ image.platform }}') {
timeout(time: max_time, unit: 'MINUTES') {
init_git()
// We're purposefully not setting the built image here since they
// are not yet being uploaded to tlcpack
// {{ image.name }} = build_image('{{ image.name }}')
build_image('{{ image.name }}')
}
}
}
}, 'ci-gpu': {
node('GPU') {
timeout(time: max_time, unit: 'MINUTES') {
docker_init('none')
init_git()
build_image('ci_gpu')
}
}
}, 'ci-qemu': {
node('CPU') {
timeout(time: max_time, unit: 'MINUTES') {
docker_init('none')
init_git()
build_image('ci_qemu')
}
}
}, 'ci-i386': {
node('CPU') {
timeout(time: max_time, unit: 'MINUTES') {
docker_init('none')
init_git()
build_image('ci_i386')
}
}
}, 'ci-arm': {
node('ARM') {
timeout(time: max_time, unit: 'MINUTES') {
docker_init('none')
init_git()
build_image('ci_arm')
}
}
}, 'ci-wasm': {
node('CPU') {
timeout(time: max_time, unit: 'MINUTES') {
docker_init('none')
init_git()
build_image('ci_wasm')
}
}
}, 'ci-hexagon': {
node('CPU') {
timeout(time: max_time, unit: 'MINUTES') {
docker_init('none')
init_git()
build_image('ci_hexagon')
}
}
}
}
// // TODO: Once we are able to use the built images, enable this step
// // If the docker images changed, we need to run the image build before the lint
// // can run since it requires a base docker image. Most of the time the images
// // aren't build though so it's faster to use the same node that checks for
// // docker changes to run the lint in the usual case.
// stage('Sanity Check (re-run)') {
// timeout(time: max_time, unit: 'MINUTES') {
// node('CPU') {
// ws({{ m.per_exec_ws('tvm/sanity') }}) {
// init_git()
// sh (
// script: "${docker_run} ${ci_lint} ./tests/scripts/task_lint.sh",
// label: 'Run lint',
// )
// }
// }
// }
// }
}

// Run make. First try to do an incremental make from a previous workspace in hope to
// accelerate the compilation. If something is wrong, clean the workspace and then
// build from scratch.
def make(docker_type, path, make_flag) {
timeout(time: max_time, unit: 'MINUTES') {
try {
cmake_build(docker_type, path, make_flag)
// always run cpp test when build
} catch (hudson.AbortException ae) {
// script exited due to user abort, directly throw instead of retry
if (ae.getMessage().contains('script returned exit code 143')) {
throw ae
}
echo 'Incremental compilation failed. Fall back to build from scratch'
sh (
script: "${docker_run} ${docker_type} ./tests/scripts/task_clean.sh ${path}",
label: 'Clear old cmake workspace',
)
cmake_build(docker_type, path, make_flag)
}
},
{% endfor %}
)
}
}
3 changes: 3 additions & 0 deletions jenkins/Jenkinsfile.j2
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ if (currentBuild.getBuildCauses().toString().contains('BranchIndexingCause')) {
{% set hexagon_api = ['build/hexagon_api_output',] %}
s3_prefix = "tvm-jenkins-artifacts-prod/tvm/${env.BRANCH_NAME}/${env.BUILD_NUMBER}"

{% set aws_default_region = "us-west-2" %}
{% set aws_ecr_url = "dkr.ecr." + aws_default_region + ".amazonaws.com" %}

// General note: Jenkins has limits on the size of a method (or top level code)
// that are pretty strict, so most usage of groovy methods in these templates
// are purely to satisfy the JVM
Expand Down
10 changes: 5 additions & 5 deletions jenkins/Lint.groovy.j2
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ def lint() {
stage('Lint') {
parallel(
{% call m.sharded_lint_step(
name='Lint',
num_shards=2,
node='CPU-SMALL',
ws='tvm/lint',
docker_image='ci_lint',
name='Lint',
num_shards=2,
node='CPU-SMALL',
ws='tvm/lint',
docker_image='ci_lint',
)
%}
sh (
Expand Down
11 changes: 11 additions & 0 deletions jenkins/Prepare.groovy.j2
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,17 @@ def docker_init(image) {
""",
label: 'Clean old Docker images',
)

if (image.contains("amazonaws.com")) {
// If this string is in the image name it's from ECR and needs to be pulled
// with the right credentials
ecr_pull(image)
} else {
sh(
script: "docker pull ${image}",
label: 'Pull docker image',
)
}
}

def should_skip_slow_tests(pr_number) {
Expand Down
Loading

0 comments on commit dd75257

Please sign in to comment.