Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFE-2962: configure ovs should use node-ip-hint set by nodeip-configuration #3362

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 75 additions & 5 deletions templates/common/_base/files/configure-ovs-network.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ contents:

# precautionary sleep of 10s (default timeout of NM to bring down devices)
sleep 10

# After reload, devices that were already connected should connect again
# if any profile is available. If no profile is available, a device can
# remain disconnected and we have to explicitly connect it so that a
Expand Down Expand Up @@ -490,6 +490,63 @@ contents:
echo ""
}

get_ip_from_ip_hint_file() {
local ip_hint_file="$1"
if [[ ! -f "${ip_hint_file}" ]]; then
return
fi
ip_hint=$(cat "${ip_hint_file}")
echo "${ip_hint}"
}

# This function waits for ip address of br-ex to be bindable only in case of ipv6
# This is workaround for OCPBUGS-673 as it will not allow starting crio
# before address is bindable
try_to_bind_ipv6_address() {
# Retry for 1 minute
retries=60
until [[ ${retries} -eq 0 ]]; do
ip=$(ip -6 -j addr | jq -r "first(.[] | select(.ifname==\"br-ex\") | .addr_info[] | select(.scope==\"global\") | .local)")
if [[ "${ip}" == "" ]]; then
echo "No ipv6 ip to bind was found"
break
fi
random_port=$(shuf -i 50000-60000 -n 1)
echo "Trying to bind ${ip} on port ${random_port}"
exit_code=$(timeout 2s nc -l "${ip}" ${random_port}; echo $?)
if [[ exit_code -eq 124 ]]; then
echo "Address bound successfully"
break
fi
sleep 1
(( retries-- ))
done
if [[ ${retries} -eq 0 ]]; then
echo "Failed to bind ip"
exit 1
fi
}

# Get interface that matches ip from node ip hint file
# in case file not exists return nothing and
# fallback to default interface search flow
get_nodeip_hint_interface() {
local ip_hint=""
local ip_hint_file="$1"
local extra_bridge="$2"
local iface=""

ip_hint=$(get_ip_from_ip_hint_file "${ip_hint_file}")
if [[ -z "${ip_hint}" ]]; then
return
fi

iface=$(ip -j addr | jq -r "first(.[] | select(any(.addr_info[]; .local==\"${ip_hint}\") and .ifname!=\"br-ex1\" and .ifname!=\"${extra_bridge}\")) | .ifname")
if [[ -n "${iface}" ]]; then
echo "${iface}"
fi
}

# Accepts parameters $bridge_interface (e.g. ovs-port-phys0)
# Returns the physical interface name if $bridge_interface exists, "" otherwise
get_bridge_physical_interface() {
Expand All @@ -499,25 +556,36 @@ contents:
echo "${physical_interface}"
}

# Accepts parameters $iface, $iface_default_hint_file
# Finds the default interface. If the default interface is br-ex, use that and return.
# Accepts parameters $iface, $iface_default_hint_file, $ip_hint_file
# Finds the nodeip interface from the interface that matches the ip address in $ip_hint_file.
# Otherwise fallbacks to a previously used interface or to the default interface.ç
# Never use the interface that is provided inside extra_bridge_file for br-ex1.
# Never use br-ex1.
# Read $ip_hint_file and return the interface that matches this ip. Otherwise:
# If the default interface is br-ex, use that and return.
# If the default interface is not br-ex:
# Check if there is a valid hint inside iface_default_hint_file. If so, use that hint.
# If there is no valid hint, use the default interface that we found during the step
# earlier. Write the default interface to the hint file.
get_default_interface() {
get_nodeip_interface() {
local iface=""
local counter=0
local iface_default_hint_file="$1"
local extra_bridge_file="$2"
local ip_hint_file="$3"
local extra_bridge=""

if [ -f "${extra_bridge_file}" ]; then
extra_bridge=$(cat ${extra_bridge_file})
fi

# if node ip was set, we should search for interface that matches it
iface=$(get_nodeip_hint_interface "${ip_hint_file}" "${extra_bridge}")
if [[ -n "${iface}" ]]; then
echo "${iface}"
return
fi

# find default interface
# the default interface might be br-ex, so check this before looking at the hint
while [ ${counter} -lt 12 ]; do
Expand Down Expand Up @@ -654,6 +722,7 @@ contents:
ovnk_var_dir='/var/lib/ovnk'
extra_bridge_file="${ovnk_config_dir}/extra_bridge"
iface_default_hint_file="${ovnk_var_dir}/iface_default_hint"
ip_hint_file="/run/nodeip-configuration/primary-ip"

# make sure to create ovnk_config_dir if it does not exist, yet
mkdir -p "${ovnk_config_dir}"
Expand Down Expand Up @@ -694,7 +763,7 @@ contents:
fi
touch /run/configure-ovs-boot-done

iface=$(get_default_interface "${iface_default_hint_file}" "$extra_bridge_file")
iface=$(get_nodeip_interface "${iface_default_hint_file}" "${extra_bridge_file}" "${ip_hint_file}")

if [ "$iface" != "br-ex" ]; then
# Default gateway is not br-ex.
Expand Down Expand Up @@ -739,6 +808,7 @@ contents:
connections+=(ovs-if-phys1 ovs-if-br-ex1)
fi
activate_nm_connections "${connections[@]}"
try_to_bind_ipv6_address
elif [ "$1" == "OpenShiftSDN" ]; then
# Revert changes made by /usr/local/bin/configure-ovs.sh during SDN migration.
rollback_nm
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ enabled: {{if eq .Infra.Status.PlatformStatus.Type "None"}}true{{else}}false{{en
contents: |
[Unit]
Description=Writes IP address configuration so that kubelet and crio services select a valid node IP
Wants=network-online.target crio-wipe.service
After=network-online.target ignition-firstboot-complete.service crio-wipe.service
Before=kubelet.service crio.service
Wants=NetworkManager-wait-online.service crio-wipe.service
After=NetworkManager-wait-online.service ignition-firstboot-complete.service crio-wipe.service
Before=kubelet.service crio.service ovs-configuration.service

[Service]
# Need oneshot to delay kubelet
Expand All @@ -20,6 +20,7 @@ contents: |
--net=host \
--security-opt label=disable \
--volume /etc/systemd/system:/etc/systemd/system \
--volume /run/nodeip-configuration:/run/nodeip-configuration \
{{ .Images.baremetalRuntimeCfgImage }} \
node-ip \
set \
Expand All @@ -32,6 +33,7 @@ contents: |
sleep 5; \
done"
ExecStart=/bin/systemctl daemon-reload
ExecStartPre=/bin/mkdir -p /run/nodeip-configuration

{{if .Proxy -}}
EnvironmentFile=/etc/mco/proxy.env
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ contents: |
# This service is used to move a physical NIC into OVS and reconfigure OVS to use the host IP
Requires=openvswitch.service
Wants=NetworkManager-wait-online.service
After=NetworkManager-wait-online.service openvswitch.service network.service
After=NetworkManager-wait-online.service openvswitch.service network.service nodeip-configuration.service
Before=network-online.target kubelet.service crio.service node-valid-hostname.service

[Service]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ contents: |
# This only applies to VIP managing environments where the kubelet and crio IP
# address picking logic is flawed and may end up selecting an address from a
# different subnet or a deprecated address
Wants=network-online.target crio-wipe.service
After=network-online.target ignition-firstboot-complete.service crio-wipe.service
Before=kubelet.service crio.service
Wants=NetworkManager-wait-online.service crio-wipe.service
After=NetworkManager-wait-online.service ignition-firstboot-complete.service crio-wipe.service
Before=kubelet.service crio.service ovs-configuration.service

[Service]
# Need oneshot to delay kubelet
Expand All @@ -23,6 +23,7 @@ contents: |
--net=host \
--security-opt label=disable \
--volume /etc/systemd/system:/etc/systemd/system \
--volume /run/nodeip-configuration:/run/nodeip-configuration \
{{ .Images.baremetalRuntimeCfgImage }} \
node-ip \
set --retry-on-failure \
Expand All @@ -31,7 +32,7 @@ contents: |
sleep 5; \
done"
ExecStart=/bin/systemctl daemon-reload

ExecStartPre=/bin/mkdir -p /run/nodeip-configuration
{{if .Proxy -}}
EnvironmentFile=/etc/mco/proxy.env
{{end -}}
Expand Down