diff --git a/Dockerfile b/Dockerfile index 96c82b0..62a4604 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ FROM fedora -RUN dnf install -y git findutils systemd grub2-tools-minimal +RUN dnf install -y git findutils systemd grub2-tools-minimal util-linux jq RUN git clone https://github.com/bats-core/bats-core.git WORKDIR /bats-core @@ -9,9 +9,10 @@ COPY ./usr/libexec/greenboot /usr/libexec/greenboot COPY ./usr/lib/greenboot/check /usr/lib/greenboot/check RUN mkdir -p /etc/greenboot/{green.d,red.d,check} RUN mkdir /etc/greenboot/check/{required.d,wanted.d} +COPY ./etc/greenboot/greenboot.conf /etc/greenboot/greenboot.conf -WORKDIR /testing -COPY ./tests . +COPY ./tests /testing COPY ./tests/testing_files/fedora_iot.conf /etc/ostree/remotes.d/fedora_iot.conf +WORKDIR /testing ENTRYPOINT [ "/bin/bash", "launch_all_tests.sh" ] diff --git a/README.md b/README.md index 30d4cac..62a815d 100644 --- a/README.md +++ b/README.md @@ -32,12 +32,9 @@ systemctl reboot ### Configuration At the moment, it is possible to customize the following parameters via environment variables. These environment variables can be described as well in the config file `/etc/greenboot/greenboot.conf`: -- **GREENBOOT_MAX_BOOT_ATTEMPTS**. Maximum number of boot attempts. - -#### Sample `etc/greenboot/greenboot.conf` file -``` bash -GREENBOOT_MAX_BOOT_ATTEMPTS=2 -``` +- **GREENBOOT_MAX_BOOT_ATTEMPTS**: Maximum number of boot attempts before declaring the deployment as problematic and rolling back to the previous one. +- **GREENBOOT_WATCHDOG_CHECK_ENABLED**: Enables/disables *Check if current boot has been triggered by hardware watchdog* health check. More info on [Health checks included with subpackage greenboot-default-health-checks](#health-checks-included-with-subpackage-greenboot\-default\-health\-checks) section. +- **GREENBOOT_WATCHDOG_GRACE_PERIOD**: Number of hours after an upgrade that we consider the new deployment as culprit of reboot. ### Health checks with bash scripts @@ -74,6 +71,7 @@ These health checks are available in `/usr/lib/greenboot/check`, a read-only dir - **Check if repositories URLs are still DNS solvable**: This script is under `/usr/lib/greenboot/check/required.d/01_repository_dns_check.sh` and makes sure that DNS queries to repository URLs are still available. - **Check if update platforms are still reachable**: This script is under `/usr/lib/greenboot/check/wanted.d/01_update_platform_check.sh` and tries to connect and get a 2XX or 3XX HTTP code from the update platforms defined in `/etc/ostree/remotes.d`. +- **Check if current boot has been triggered by hardware watchdog**: This script is under `/usr/lib/greenboot/check/required.d/02_watchdog.sh` and checks whether the current boot has been watchdog-triggered or not. If it is, but the reboot has occurred after a certain grace period (default of 24 hours, configurable via `GREENBOOT_WATCHDOG_GRACE_PERIOD=number_of_hours` in `/etc/greenboot/greenboot.conf`), Greenboot won't mark the current boot as red and won't rollback to the previous deployment. If has occurred within the grace period, at the moment the current boot will be marked as red, but Greenboot won't rollback to the previous deployment. It is enabled by default but it can be disabled by modifying `GREENBOOT_WATCHDOG_CHECK_ENABLED` in `/etc/greenboot/greenboot.conf` to `false`. ### Health Checks with systemd services Overall boot success is measured against `boot-complete.target`. diff --git a/etc/greenboot/greenboot.conf b/etc/greenboot/greenboot.conf new file mode 100644 index 0000000..3cae5ff --- /dev/null +++ b/etc/greenboot/greenboot.conf @@ -0,0 +1,15 @@ +# Greenboot configuration file + +## Generic +# GREENBOOT_MAX_BOOT_ATTEMPTS=3 + +## Watchdog +### This variable controls +### This value can be "true, TRUE, True..." as it will be lowercased. +### Set it to anything else to disable this check. +GREENBOOT_WATCHDOG_CHECK_ENABLED=true + +### This variable is the number of hours after an upgrade that we consider +### the new deployment as culprit of reboot. +### It has to be a positive integer. Defaults to 24 (hours). +# GREENBOOT_WATCHDOG_GRACE_PERIOD=24 \ No newline at end of file diff --git a/greenboot.spec b/greenboot.spec index ade0798..4a3cf8d 100644 --- a/greenboot.spec +++ b/greenboot.spec @@ -39,6 +39,8 @@ Obsoletes: greenboot-rpm-ostree-grub2 <= 0.12.0 %package default-health-checks Summary: Series of optional and curated health checks Requires: %{name} = %{version}-%{release} +Requires: util-linux +Requires: jq Provides: greenboot-update-platforms-check Obsoletes: greenboot-update-platforms-check <= 0.12.0 @@ -157,6 +159,7 @@ install -DpZm 0755 usr/lib/greenboot/check/wanted.d/* %{buildroot}%{_prefix}/lib %{_prefix}/lib/%{name}/check/required.d/01_repository_dns_check.sh %{_prefix}/lib/%{name}/check/wanted.d/01_update_platforms_check.sh %{_unitdir}/greenboot-healthcheck.service.d/10-network-online.conf +%{_prefix}/lib/%{name}/check/required.d/02_watchdog.sh %changelog * Wed Nov 10 2021 Peter Robinson - 0.13.1-1 diff --git a/tests/check_watchdog_support.bats b/tests/check_watchdog_support.bats new file mode 100644 index 0000000..2e6da95 --- /dev/null +++ b/tests/check_watchdog_support.bats @@ -0,0 +1,10 @@ +load common.bash + +function setup() { + source $GREENBOOT_DEFAULT_CHECK_PATH/required.d/02_watchdog.sh --source-only +} + +@test "Ensure watchdog check is working" { + run check_if_current_boot_is_wd_triggered + [ "$status" -eq 0 ] +} diff --git a/tests/greenboot_check.bats b/tests/greenboot_check.bats index 20e5eb2..30becad 100644 --- a/tests/greenboot_check.bats +++ b/tests/greenboot_check.bats @@ -1,6 +1,11 @@ load common.bash function setup() { + # 02_watchdog.sh can't be checked within the container at the moment + # due to rpm-ostree, hence moving it out of the required directory + # for this test + mv $GREENBOOT_DEFAULT_CHECK_PATH/required.d/02_watchdog.sh /tmp/02_watchdog.sh + # This checks that the /etc/greenboot/check path works as well # as the /usr/lib/greenboot/check one mv $GREENBOOT_DEFAULT_CHECK_PATH/wanted.d/* $GREENBOOT_ETC_CHECK_PATH/wanted.d/ @@ -19,5 +24,6 @@ function setup() { } function teardown() { + mv /tmp/02_watchdog.sh $GREENBOOT_DEFAULT_CHECK_PATH/required.d/02_watchdog.sh mv $GREENBOOT_ETC_CHECK_PATH/wanted.d/* $GREENBOOT_DEFAULT_CHECK_PATH/wanted.d/ } diff --git a/tests/greenboot_check_fail_required.bats b/tests/greenboot_check_fail_required.bats index df24639..da23597 100644 --- a/tests/greenboot_check_fail_required.bats +++ b/tests/greenboot_check_fail_required.bats @@ -7,7 +7,7 @@ function setup() { @test "Test greenboot check with required scripts failing" { run $GREENBOOT_BIN_PATH check - [ "$status" -eq 1 ] + [ "$status" -ne 0 ] } @test "Test greenboot runs all required scripts even if one fails" { diff --git a/tests/greenboot_check_fail_wanted.bats b/tests/greenboot_check_fail_wanted.bats index cf08b56..03e2eae 100644 --- a/tests/greenboot_check_fail_wanted.bats +++ b/tests/greenboot_check_fail_wanted.bats @@ -1,6 +1,11 @@ load common.bash function setup() { + # 02_watchdog.sh can't be checked within the container at the moment + # due to rpm-ostree, hence moving it out of the required directory + # for this test + mv $GREENBOOT_DEFAULT_CHECK_PATH/required.d/02_watchdog.sh /tmp/02_watchdog.sh + cp testing_files/10_failing_check.sh $GREENBOOT_DEFAULT_CHECK_PATH/wanted.d/ } @@ -11,4 +16,5 @@ function setup() { function teardown() { rm $GREENBOOT_DEFAULT_CHECK_PATH/wanted.d/10_failing_check.sh + mv /tmp/02_watchdog.sh $GREENBOOT_DEFAULT_CHECK_PATH/required.d/02_watchdog.sh } diff --git a/usr/lib/greenboot/check/required.d/02_watchdog.sh b/usr/lib/greenboot/check/required.d/02_watchdog.sh new file mode 100644 index 0000000..101750d --- /dev/null +++ b/usr/lib/greenboot/check/required.d/02_watchdog.sh @@ -0,0 +1,64 @@ +#!/bin/bash +set -eo pipefail + +source_configuration_file() { + GREENBOOT_CONFIGURATION_FILE=/etc/greenboot/greenboot.conf + if test -f "$GREENBOOT_CONFIGURATION_FILE"; then + source $GREENBOOT_CONFIGURATION_FILE + fi +} + +set_grace_period() { + DEFAULT_GRACE_PERIOD=24 # default to 24 hours + + if [ -n "$GREENBOOT_WATCHDOG_GRACE_PERIOD" ]; then + GRACE_PERIOD=$GREENBOOT_WATCHDOG_GRACE_PERIOD + else + GRACE_PERIOD=$DEFAULT_GRACE_PERIOD + fi +} + +check_if_there_is_a_watchdog() { + if wdctl 2>/dev/null ; then + return 0 + else + return 1 + fi +} + +check_if_current_boot_is_wd_triggered() { + if check_if_there_is_a_watchdog ; then + WDCTL_OUTPUT=$(wdctl --flags-only --noheadings | grep -c '1$' || true) + if [ "$WDCTL_OUTPUT" -gt 0 ]; then + # This means the boot was watchdog triggered + # TO-DO: maybe do a rollback here? + echo "Watchdog triggered after recent update" + exit 1 + fi + else + # There's no watchdog, so nothing to be done here + exit 0 + fi +} + +# This is in order to test check_if_current_boot_is_wd_triggered +# function within a container +if [ "${1}" != "--source-only" ]; then + source_configuration_file + if [ "${GREENBOOT_WATCHDOG_CHECK_ENABLED,,}" != "true" ]; then + echo "Watchdog check is disabled" + exit 0 + fi + + set_grace_period + + SECONDS_IN_AN_HOUR=$((60 * 60)) + LAST_DEPLOYMENT_TIMESTAMP=$(rpm-ostree status --json | jq .deployments[0].timestamp) + + HOURS_SINCE_LAST_UPDATE=$((($(date +%s) - "$LAST_DEPLOYMENT_TIMESTAMP") / SECONDS_IN_AN_HOUR)) + if [ "$HOURS_SINCE_LAST_UPDATE" -lt "$GRACE_PERIOD" ]; then + check_if_current_boot_is_wd_triggered + else + exit 0 + fi +fi diff --git a/usr/libexec/greenboot/greenboot-grub2-set-counter b/usr/libexec/greenboot/greenboot-grub2-set-counter index e466795..0f4c285 100755 --- a/usr/libexec/greenboot/greenboot-grub2-set-counter +++ b/usr/libexec/greenboot/greenboot-grub2-set-counter @@ -3,7 +3,7 @@ set -eo pipefail GREENBOOT_CONFIGURATION_FILE=/etc/greenboot/greenboot.conf if test -f "$GREENBOOT_CONFIGURATION_FILE"; then - source $GREENBOOT_CONFIGURATION_FILE + source $GREENBOOT_CONFIGURATION_FILE fi if [ -n "$1" ]; then