diff --git a/group_vars/all.yml.sample b/group_vars/all.yml.sample index 205397288f..ca6e480042 100644 --- a/group_vars/all.yml.sample +++ b/group_vars/all.yml.sample @@ -420,6 +420,7 @@ dummy: # RGW handler checks #handler_health_rgw_check_retries: 5 #handler_health_rgw_check_delay: 10 +#handler_rgw_use_haproxy_maintenance: false # NFS handler checks #handler_health_nfs_check_retries: 5 diff --git a/roles/ceph-defaults/defaults/main.yml b/roles/ceph-defaults/defaults/main.yml index d8b06cdd19..a61280100c 100644 --- a/roles/ceph-defaults/defaults/main.yml +++ b/roles/ceph-defaults/defaults/main.yml @@ -412,6 +412,7 @@ handler_health_mds_check_delay: 10 # RGW handler checks handler_health_rgw_check_retries: 5 handler_health_rgw_check_delay: 10 +handler_rgw_use_haproxy_maintenance: false # NFS handler checks handler_health_nfs_check_retries: 5 diff --git a/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 b/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 index 5ea0f3c7db..d7eb36a723 100644 --- a/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 +++ b/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 @@ -11,6 +11,7 @@ else RGW_PROTOCOL=http fi INSTANCES_NAME=({% for i in rgw_instances %}{{ i.instance_name }} {% endfor %}) +HAPROXY_BACKEND=({% for i in rgw_instances %}{{ i.haproxy_backend | default('rgw-backend') }} {% endfor %}) RGW_IPS=({% for i in rgw_instances %}{{ i.radosgw_address }} {% endfor %}) RGW_PORTS=({% for i in rgw_instances %}{{ i.radosgw_frontend_port }} {% endfor %}) RGW_ZONE="{{ rgw_zone }}" @@ -78,19 +79,38 @@ check_rest() { } for ((i=0; i<${RGW_NUMS}; i++)); do - # First, restart the daemon - # Check if systemd unit exists # This is needed for new instances as the restart might trigger before the deployment - if systemctl list-units --full --all | grep -q "ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}"; then - systemctl restart ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]} - else + if ! systemctl list-units --full --all | grep -q "ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}"; then echo "Systemd unit ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]} does not exist." continue fi +{% if handler_rgw_use_haproxy_maintenance %} + # set server weight to 0 on haproxy + echo "set weight ${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} 0" | socat stdio {{ haproxy_socket_path }} + + # wait for the connections to drop + retries={{ handler_rgw_haproxy_maintenance_retries | default(60) }} + while [ $retries -gt 0 ]; do + if [ "$(echo "show servers conn ${HAPROXY_BACKEND[i]}" | socat stdio {{ haproxy_socket_path }} | grep "${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} " | awk '{ print $7 }')" -eq 0 ]; then + break + fi + sleep 1 + let retries=retries-1 + done +{% endif %} + + # Restart the daemon + systemctl restart ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]} + # Check socket files check_socket ${i} # Check rest check_rest ${i} + +{% if handler_rgw_use_haproxy_maintenance %} + # set server weight to 100 on haproxy + echo "set weight ${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} 100" | socat stdio {{ haproxy_socket_path }} +{% endif %} done