Skip to content

Commit

Permalink
Merge pull request #4482 from eranco74/bootstrap-in-place
Browse files Browse the repository at this point in the history
Single Node deployment with bootstrap-in-place
  • Loading branch information
openshift-merge-robot authored Feb 12, 2021
2 parents 2eaa6e7 + 8e4a408 commit 1ad0158
Show file tree
Hide file tree
Showing 14 changed files with 530 additions and 70 deletions.
12 changes: 11 additions & 1 deletion cmd/openshift-install/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,16 @@ var (
},
assets: targetassets.IgnitionConfigs,
}
singleNodeIgnitionConfigTarget = target{
name: "Single Node Ignition Config",
command: &cobra.Command{
Use: "single-node-ignition-config",
Short: "Generates the bootstrap-in-place-for-live-iso Ignition Config asset",
// FIXME: add longer descriptions for our commands with examples for better UX.
// Long: "",
},
assets: targetassets.SingleNodeIgnitionConfig,
}

clusterTarget = target{
name: "Cluster",
Expand Down Expand Up @@ -141,7 +151,7 @@ var (
assets: targetassets.Cluster,
}

targets = []target{installConfigTarget, manifestsTarget, ignitionConfigsTarget, clusterTarget}
targets = []target{installConfigTarget, manifestsTarget, ignitionConfigsTarget, clusterTarget, singleNodeIgnitionConfigTarget}
)

func newCreateCmd() *cobra.Command {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env bash
set -euoE pipefail ## -E option will cause functions to inherit trap

echo "Running bootkube bootstrap-in-place post reboot"
export KUBECONFIG=/etc/kubernetes/bootstrap-secrets/kubeconfig

function wait_for_api {
until oc get csr &> /dev/null
do
echo "Waiting for api ..."
sleep 5
done
}

function restart_kubelet {
echo "Restarting kubelet"
until [ "$(oc get pod -n openshift-kube-apiserver-operator --selector='app=kube-apiserver-operator' -o jsonpath='{.items[0].status.conditions[?(@.type=="Ready")].status}' | grep -c "True")" -eq 1 ];
do
echo "Waiting for kube-apiserver-operator ready condition to be True"
sleep 10
done
# daemon-reload is required because /etc/systemd/system/kubelet.service.d/20-nodenet.conf is added after kubelet started
systemctl daemon-reload
systemctl restart kubelet

while grep bootstrap-kube-apiserver /etc/kubernetes/manifests/kube-apiserver-pod.yaml;
do
echo "Waiting for kube-apiserver to apply the new static pod configuration"
sleep 10
done
systemctl restart kubelet
}

function approve_csr {
echo "Approving csrs ..."
until [ "$(oc get nodes --selector='node-role.kubernetes.io/master' -o jsonpath='{.items[0].status.conditions[?(@.type=="Ready")].status}' | grep -c "True")" -eq 1 ];
do
echo "Approving csrs ..."
oc get csr -o go-template='{{range .items}}{{if not .status}}{{.metadata.name}}{{"\n"}}{{end}}{{end}}' | xargs --no-run-if-empty oc adm certificate approve &> /dev/null || true
sleep 30
done
}

function wait_for_cvo {
echo "Waiting for cvo"
until [ "$(oc get clusterversion -o jsonpath='{.items[0].status.conditions[?(@.type=="Available")].status}')" == "True" ];
do
echo "Still waiting for cvo ..."
sleep 30
done
}

function clean {
if [ -d "/etc/kubernetes/bootstrap-secrets" ]; then
rm -rf /etc/kubernetes/bootstrap-*
fi

rm -rf /usr/local/bin/installer-gather.sh
rm -rf /usr/local/bin/installer-masters-gather.sh
rm -rf /var/log/log-bundle-bootstrap.tar.gz

systemctl disable bootkube.service
}

wait_for_api
approve_csr
restart_kubelet
wait_for_cvo
clean
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
variant: fcos
version: 1.1.0
ignition:
config:
merge:
- local: original-master.ign
storage:
trees:
- local: kubernetes/bootstrap-configs
path: /etc/kubernetes/bootstrap-configs
- local: tls/
path: /etc/kubernetes/bootstrap-secrets
- local: etcd-bootstrap/bootstrap-manifests/secrets/
path: /etc/kubernetes/static-pod-resources/etcd-member
- local: etcd-data
path: /var/lib/etcd
files:
- path: /etc/kubernetes/bootstrap-secrets/kubeconfig
contents:
local: auth/kubeconfig-loopback
- path: /etc/kubernetes/static-pod-resources/etcd-member/ca.crt
contents:
local: tls/etcd-ca-bundle.crt
- path: /etc/kubernetes/manifests/etcd-pod.yaml
contents:
local: etcd-bootstrap/bootstrap-manifests/etcd-member-pod.yaml
- path: /etc/kubernetes/manifests/kube-apiserver-pod.yaml
contents:
local: bootstrap-manifests/kube-apiserver-pod.yaml
- path: /etc/kubernetes/manifests/kube-controller-manager-pod.yaml
contents:
local: bootstrap-manifests/kube-controller-manager-pod.yaml
- path: /etc/kubernetes/manifests/kube-scheduler-pod.yaml
contents:
local: bootstrap-manifests/kube-scheduler-pod.yaml
- path: /usr/local/bin/bootstrap-in-place-post-reboot.sh
contents:
local: bootstrap-in-place/bootstrap-in-place-post-reboot.sh
mode: 0555
- path: /var/log/log-bundle-bootstrap.tar.gz
contents:
local: log-bundle-bootstrap.tar.gz
- path: /usr/local/bin/installer-masters-gather.sh
contents:
local: bin/installer-masters-gather.sh
mode: 0555
- path: /usr/local/bin/installer-gather.sh
contents:
local: bin/installer-gather.sh
mode: 0555
systemd:
units:
- name: bootkube.service
enabled: true
contents: |
[Unit]
Description=Bootkube - bootstrap in place post reboot
Wants=kubelet.service
After=kubelet.service
ConditionPathExists=/etc/kubernetes/bootstrap-secrets/kubeconfig
[Service]
Type=oneshot
ExecStart=/usr/local/bin/bootstrap-in-place-post-reboot.sh
RestartSec=5s
[Install]
WantedBy=multi-user.target
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env bash
set -euoE pipefail ## -E option will cause functions to inherit trap

# This script is executed by bootkube.sh when installing single node with bootstrap in place
CLUSTER_BOOTSTRAP_IMAGE=$1


bootkube_podman_run() {
# we run all commands in the host-network to prevent IP conflicts with
# end-user infrastructure.
podman run --quiet --net=host "${@}"
}

if [ ! -f stop-etcd.done ]; then
echo "Stop etcd static pod by moving the manifest"
mv /etc/kubernetes/manifests/etcd-member-pod.yaml /etc/kubernetes || echo "already moved etcd-member-pod.yaml"

until ! crictl ps | grep etcd; do
echo "Waiting for etcd to go down"
sleep 10
done

touch stop-etcd.done
fi

if [ ! -f master-ignition.done ]; then
echo "Creating master ignition and writing it to disk"
# Get the master ignition from MCS
curl --header 'Accept:application/vnd.coreos.ignition+json;version=3.1.0' \
http://localhost:22624/config/master -o /opt/openshift/original-master.ign

GATHER_ID="bootstrap"
GATHER_TAR_FILE=log-bundle-${GATHER_ID}.tar.gz

echo "Gathering installer bootstrap logs"
TAR_FILE=${GATHER_TAR_FILE} /usr/local/bin/installer-gather.sh --id ${GATHER_ID}

echo "Adding bootstrap control plane and bootstrap installer-gather bundle to master ignition"
bootkube_podman_run \
--rm \
--privileged \
--volume "$PWD:/assets:z" \
--volume "/usr/local/bin/:/assets/bin" \
--volume "/var/lib/etcd/:/assets/etcd-data" \
--volume "/etc/kubernetes:/assets/kubernetes" \
"${CLUSTER_BOOTSTRAP_IMAGE}" \
bootstrap-in-place \
--asset-dir /assets \
--input /assets/bootstrap-in-place/master-update.fcc \
--output /assets/master.ign

touch master-ignition.done
fi
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
set -euoE pipefail ## -E option will cause functions to inherit trap

# This script is executed by install-to-disk service when installing single node with bootstrap in place

echo "Waiting for /opt/openshift/.bootkube.done"
until [ -f /opt/openshift/.bootkube.done ]; do
sleep 5
done

if [ ! -f coreos-installer.done ]; then
# Write image + ignition to disk
echo "Executing coreos-installer with the following options: install -i /opt/openshift/master.ign {{.BootstrapInPlace.InstallationDisk}}"
coreos-installer install -i /opt/openshift/master.ign {{.BootstrapInPlace.InstallationDisk}}
+
touch coreos-installer.done
fi

echo "Going to reboot"
shutdown -r +1 "Bootstrap completed, server is going to reboot."
touch /opt/openshift/.install-to-disk.done
echo "Done"
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[Unit]
Description=Install to disk
Requires=bootkube.service
Wants=bootkube.service
After=bootkube.service
ConditionPathExists=!/opt/openshift/.install-to-disk.done

[Service]
WorkingDirectory=/opt/openshift
ExecStart=/usr/local/bin/install-to-disk.sh

Restart=on-failure
RestartSec=5s
85 changes: 55 additions & 30 deletions data/data/bootstrap/files/usr/local/bin/bootkube.sh.template
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ set -euoE pipefail ## -E option will cause functions to inherit trap

mkdir --parents /etc/kubernetes/{manifests,bootstrap-configs,bootstrap-manifests}

{{- if .BootstrapInPlace }}
BOOTSTRAP_INPLACE=true
{{ else }}
BOOTSTRAP_INPLACE=false
{{ end -}}

ETCD_ENDPOINTS=

bootkube_podman_run() {
Expand All @@ -13,6 +19,26 @@ bootkube_podman_run() {
podman run --quiet --net=host "${@}"
}

wait_for_etcd_cluster() {
until bootkube_podman_run \
--rm \
--name etcdctl \
--env ETCDCTL_API=3 \
--volume /opt/openshift/tls:/opt/openshift/tls:ro,z \
--entrypoint etcdctl \
"${MACHINE_CONFIG_ETCD_IMAGE}" \
--dial-timeout=10m \
--cacert=/opt/openshift/tls/etcd-ca-bundle.crt \
--cert=/opt/openshift/tls/etcd-client.crt \
--key=/opt/openshift/tls/etcd-client.key \
--endpoints="${ETCD_ENDPOINTS}" \
endpoint health
do
echo "etcdctl failed. Retrying in 5 seconds..."
sleep 5
done
}

MACHINE_CONFIG_OPERATOR_IMAGE=$(image_for machine-config-operator)
MACHINE_CONFIG_OSCONTENT=$(image_for machine-os-content)
MACHINE_CONFIG_ETCD_IMAGE=$(image_for etcd)
Expand Down Expand Up @@ -317,24 +343,18 @@ then
touch cco-bootstrap.done
fi

# Wait for the etcd cluster to come up.
until bootkube_podman_run \
--rm \
--name etcdctl \
--env ETCDCTL_API=3 \
--volume /opt/openshift/tls:/opt/openshift/tls:ro,z \
--entrypoint etcdctl \
"${MACHINE_CONFIG_ETCD_IMAGE}" \
--dial-timeout=10m \
--cacert=/opt/openshift/tls/etcd-ca-bundle.crt \
--cert=/opt/openshift/tls/etcd-client.crt \
--key=/opt/openshift/tls/etcd-client.key \
--endpoints="${ETCD_ENDPOINTS}" \
endpoint health
do
echo "etcdctl failed. Retrying in 5 seconds..."
sleep 5
done
# in case of single node, if we removed etcd, there is no point to wait for it on restart
if [ ! -f stop-etcd.done ]
then
# Wait for the etcd cluster to come up.
wait_for_etcd_cluster
fi

REQUIRED_PODS="openshift-kube-apiserver/kube-apiserver,openshift-kube-scheduler/openshift-kube-scheduler,openshift-kube-controller-manager/kube-controller-manager,openshift-cluster-version/cluster-version-operator"
if [ "$BOOTSTRAP_INPLACE" = true ]
then
REQUIRED_PODS=""
fi

echo "Starting cluster-bootstrap..."

Expand All @@ -345,23 +365,28 @@ then
--volume "$PWD:/assets:z" \
--volume /etc/kubernetes:/etc/kubernetes:z \
"${CLUSTER_BOOTSTRAP_IMAGE}" \
start --tear-down-early=false --asset-dir=/assets --required-pods="openshift-kube-apiserver/kube-apiserver,openshift-kube-scheduler/openshift-kube-scheduler,openshift-kube-controller-manager/kube-controller-manager,openshift-cluster-version/cluster-version-operator"
start --tear-down-early=false --asset-dir=/assets --required-pods="${REQUIRED_PODS}"

touch cb-bootstrap.done
fi

rm --force /etc/kubernetes/manifests/machineconfigoperator-bootstrap-pod.yaml

if [ ! -z "$CLUSTER_ETCD_OPERATOR_IMAGE" ]
if [ "$BOOTSTRAP_INPLACE" = true ]
then
echo "Waiting for CEO to finish..."
bootkube_podman_run \
--volume "$PWD:/assets:z" \
"${CLUSTER_ETCD_OPERATOR_IMAGE}" \
/usr/bin/cluster-etcd-operator \
wait-for-ceo \
--kubeconfig /assets/auth/kubeconfig
. /usr/local/bin/bootstrap-in-place.sh "${CLUSTER_BOOTSTRAP_IMAGE}"
else
rm --force /etc/kubernetes/manifests/machineconfigoperator-bootstrap-pod.yaml

if [ ! -z "$CLUSTER_ETCD_OPERATOR_IMAGE" ]
then
echo "Waiting for CEO to finish..."
bootkube_podman_run \
--volume "$PWD:/assets:z" \
"${CLUSTER_ETCD_OPERATOR_IMAGE}" \
/usr/bin/cluster-etcd-operator \
wait-for-ceo \
--kubeconfig /assets/auth/kubeconfig
fi
fi

# Workaround for https://github.com/opencontainers/runc/pull/1807
touch /opt/openshift/.bootkube.done
echo "bootkube.service complete"
Loading

0 comments on commit 1ad0158

Please sign in to comment.