Skip to content

Commit

Permalink
Add GH workflow for automatically updating nvidia device plugin stati…
Browse files Browse the repository at this point in the history
…c manifest (#7898)

* Add GH workflow for automatically updating nvidia device plugin static manifest

* update PR body

* fix unit tests

* updates userdocs
  • Loading branch information
TiberiuGC authored Jul 29, 2024
1 parent 02c41d5 commit 7f91b9d
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 43 deletions.
84 changes: 45 additions & 39 deletions .github/workflows/update-generated.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Update generated files
on:
workflow_dispatch: {}
schedule:
- cron: "0 5 * * Thu"
- cron: "0 5 * * Thu"

permissions:
id-token: write
Expand All @@ -15,47 +15,53 @@ jobs:
strategy:
fail-fast: false
matrix:
resource: ["coredns", "aws-node"]
resource: ["coredns", "aws-node", "nvidia-device-plugin"]
name: Update ${{ matrix.resource }} and open PR
runs-on: ubuntu-latest
container: public.ecr.aws/eksctl/eksctl-build:833f4464e865a6398788bf6cbc5447967b8974b7
env:
GOPRIVATE: ""
steps:
- name: Checkout
uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 #v4.1.2
with:
token: ${{ secrets.EKSCTLBOT_TOKEN }}
fetch-depth: 0
- name: Configure AWS credentials for coredns update
if: ${{ matrix.resource == 'coredns' }}
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-region: us-west-2
role-duration-seconds: 900
role-session-name: eksctl-update-coredns-assets
role-to-assume: ${{ secrets.UPDATE_COREDNS_ROLE_ARN }}
- name: Setup identity as eksctl-bot
uses: ./.github/actions/setup-identity
with:
token: "${{ secrets.EKSCTLBOT_TOKEN }}"
- name: Cache go-build and mod
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 #v4.0.2
with:
path: |
~/.cache/go-build/
~/go/pkg/mod/
key: go-${{ hashFiles('go.sum') }}
restore-keys: |
go-
- name: Update ${{ matrix.resource }}
run: make update-${{ matrix.resource }}
- name: Upsert pull request
uses: peter-evans/create-pull-request@70a41aba780001da0a30141984ae2a0c95d8704e #v6.0.2
with:
token: ${{ secrets.EKSCTLBOT_TOKEN }}
commit-message: update ${{ matrix.resource }}
committer: eksctl-bot <eksctl-bot@users.noreply.github.com>
title: 'Update ${{ matrix.resource }}'
branch: update-${{ matrix.resource }}
labels: area/tech-debt
- name: Checkout
uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 #v4.1.2
with:
token: ${{ secrets.EKSCTLBOT_TOKEN }}
fetch-depth: 0
- name: Configure AWS credentials for coredns update
if: ${{ matrix.resource == 'coredns' }}
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-region: us-west-2
role-duration-seconds: 900
role-session-name: eksctl-update-coredns-assets
role-to-assume: ${{ secrets.UPDATE_COREDNS_ROLE_ARN }}
- name: Setup identity as eksctl-bot
uses: ./.github/actions/setup-identity
with:
token: "${{ secrets.EKSCTLBOT_TOKEN }}"
- name: Cache go-build and mod
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 #v4.0.2
with:
path: |
~/.cache/go-build/
~/go/pkg/mod/
key: go-${{ hashFiles('go.sum') }}
restore-keys: |
go-
- name: Update ${{ matrix.resource }}
run: make update-${{ matrix.resource }}
- name: Upsert pull request
uses: peter-evans/create-pull-request@70a41aba780001da0a30141984ae2a0c95d8704e #v6.0.2
with:
token: ${{ secrets.EKSCTLBOT_TOKEN }}
commit-message: update ${{ matrix.resource }}${{ env.LATEST_RELEASE_TAG }}
committer: eksctl-bot <eksctl-bot@users.noreply.github.com>
title: 'Update ${{ matrix.resource }}${{ env.LATEST_RELEASE_TAG }}'
branch: update-${{ matrix.resource }}
labels: area/tech-debt
body: |
Auto-generated by [eksctl Update Generated Files GitHub workflow][1]
[1]: https://github.com/eksctl-io/eksctl/blob/main/.github/workflows/update-generated.yaml
Please manually test before approving and merging.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ generate-all: generate-always $(conditionally_generated_files) ## Re-generate al
check-all-generated-files-up-to-date: generate-all ## Run the generate all command and verify there is no new diff
git diff --quiet -- $(conditionally_generated_files) || (git --no-pager diff $(conditionally_generated_files); echo "HINT: to fix this, run 'git commit $(conditionally_generated_files) --message \"Update generated files\"'"; exit 1)

.PHONY: update-nvidia-device-plugin
update-nvidia-device-plugin: ## fetch the latest static manifest
pkg/addons/assets/scripts/update_nvidia_device_plugin.sh

.PHONY: update-aws-node
update-aws-node: ## Re-download the aws-node manifests from AWS
Expand Down
33 changes: 33 additions & 0 deletions pkg/addons/assets/scripts/update_nvidia_device_plugin.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash

get_latest_release_tag() {
curl -sL https://api.github.com/repos/NVIDIA/k8s-device-plugin/releases/latest | jq -r '.tag_name'
}

latest_release_tag=$(get_latest_release_tag)

# Check if the latest release tag was found
if [ -z "$latest_release_tag" ]; then
echo "Could not find the latest release tag."
exit 1
fi

# If running in GitHub Actions, export the release tag for use in the workflow
if [ "$GITHUB_ACTIONS" = "true" ]; then
echo "LATEST_RELEASE_TAG= to $latest_release_tag" >> $GITHUB_ENV
else
echo "Found the latest release tag: $latest_release_tag"
fi

assets_addons_dir="pkg/addons/assets"

curl -sL "https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/$latest_release_tag/deployments/static/nvidia-device-plugin.yml" -o "$assets_addons_dir/nvidia-device-plugin.yaml"


# Check if the download was successful
if [ $? -eq 0 ]; then
echo "Downloaded the latest NVIDIA device plugin manifest to $assets_addons_dir/nvidia-device-plugin.yaml"
else
echo "Failed to download the NVIDIA device plugin manifest."
exit 1
fi
21 changes: 20 additions & 1 deletion pkg/addons/default/scripts/update_aws_node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,31 @@ get_latest_release_tag() {

latest_release_tag=$(get_latest_release_tag)

# Check if the latest release tag was found
if [ -z "$latest_release_tag" ]; then
echo "Could not find the latest release tag."
exit 1
fi

# If running in GitHub Actions, export the release tag for use in the workflow
if [ "$GITHUB_ACTIONS" = "true" ]; then
echo "LATEST_RELEASE_TAG= to $latest_release_tag" >> $GITHUB_ENV
else
echo "Found the latest release tag: $latest_release_tag"
fi

default_addons_dir="pkg/addons/default"

# Download the latest aws-k8s-cni.yaml file
curl -sL "$base_url$latest_release_tag/config/master/aws-k8s-cni.yaml?raw=1" --output "$default_addons_dir/assets/aws-node.yaml"

echo "found latest release tag:" $latest_release_tag
# Check if the download was successful
if [ $? -eq 0 ]; then
echo "Downloaded the latest AWS Node manifest to $default_addons_dir/assets/aws-node.yaml"
else
echo "Failed to download the latest AWS Node manifest."
exit 1
fi

# Update the unit test file
sed -i "s/expectedVersion = \"\(.*\)\"/expectedVersion = \"$latest_release_tag\"/g" "$default_addons_dir/aws_node_test.go"
6 changes: 4 additions & 2 deletions pkg/printers/testdata/jsontest_2clusters.golden
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
"RoleArn": null,
"Status": "ACTIVE",
"Tags": null,
"Version": null
"Version": null,
"UpgradePolicy": null
},
{
"Id": null,
Expand Down Expand Up @@ -73,6 +74,7 @@
"RoleArn": null,
"Status": "ACTIVE",
"Tags": null,
"Version": null
"Version": null,
"UpgradePolicy": null
}
]
3 changes: 2 additions & 1 deletion pkg/printers/testdata/jsontest_single.golden
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"RoleArn": null,
"Status": "ACTIVE",
"Tags": null,
"Version": null
"Version": null,
"UpgradePolicy": null
}
]
2 changes: 2 additions & 0 deletions pkg/printers/testdata/yamltest_2clusters.golden
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
Status: ACTIVE
Tags: null
Version: null
UpgradePolicy: null
- Id: null
Arn: arn-87654321
CertificateAuthority: null
Expand Down Expand Up @@ -62,3 +63,4 @@
Status: ACTIVE
Tags: null
Version: null
UpgradePolicy: null
1 change: 1 addition & 0 deletions pkg/printers/testdata/yamltest_single.golden
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@
Status: ACTIVE
Tags: null
Version: null
UpgradePolicy: null
10 changes: 10 additions & 0 deletions userdocs/src/usage/gpu-support.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,17 @@ use `--install-nvidia-plugin=false` with the create command. For example:

```
eksctl create cluster --node-type=p2.xlarge --install-nvidia-plugin=false
```

and, for versions 0.15.0 and above,

```
kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/<VERSION>/deployments/static/nvidia-device-plugin.yml
```

or, for older versions,

```
kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/<VERSION>/nvidia-device-plugin.yml
```

Expand Down

0 comments on commit 7f91b9d

Please sign in to comment.