Skip to content

Commit

Permalink
Added an interface for endpoint calculation of different NEG types.
Browse files Browse the repository at this point in the history
  • Loading branch information
prameshj committed Jan 9, 2020
1 parent 9d6f337 commit 4bd379b
Show file tree
Hide file tree
Showing 5 changed files with 213 additions and 123 deletions.
178 changes: 178 additions & 0 deletions pkg/neg/syncers/endpoints_calculator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package syncers

import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
listers "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/ingress-gce/pkg/neg/types"
"k8s.io/ingress-gce/pkg/utils"
"k8s.io/klog"
)

// LocalL4ILBEndpointGetter implements methods to calculate Network endpoints for VM_PRIMARY_IP NEGs when the service
// uses "ExternalTrafficPolicy: Local" mode.
type LocalL4ILBEndpointsCalculator struct {
nodeLister listers.NodeLister
zoneGetter types.ZoneGetter
subsetSizeLimit int
svcId string
}

func NewLocalL4ILBEndpointsCalculator(nodeLister listers.NodeLister, zoneGetter types.ZoneGetter, svcId string) *LocalL4ILBEndpointsCalculator {
return &LocalL4ILBEndpointsCalculator{nodeLister: nodeLister, zoneGetter: zoneGetter, subsetSizeLimit: maxSubsetSizeLocal, svcId: svcId}
}

// CalculateEndpoints determines the endpoints in the NEGs based on the current service endpoints and the current NEGs.
func (l *LocalL4ILBEndpointsCalculator) CalculateEndpoints(ep *v1.Endpoints, currentMap map[string]types.NetworkEndpointSet) (map[string]types.NetworkEndpointSet, types.EndpointPodMap, error) {
// List all nodes where the service endpoints are running. Get a subset of the desired count.
nodeZoneMap := make(map[string][]*v1.Node)
nodeNames := sets.String{}
numEndpoints := 0
for _, curEp := range ep.Subsets {
for _, addr := range curEp.Addresses {
if addr.NodeName == nil {
klog.V(2).Infof("Endpoint %q in Endpoints %s/%s does not have an associated node. Skipping", addr.IP, ep.Namespace, ep.Name)
continue
}
if addr.TargetRef == nil {
klog.V(2).Infof("Endpoint %q in Endpoints %s/%s does not have an associated pod. Skipping", addr.IP, ep.Namespace, ep.Name)
continue
}
numEndpoints++
if nodeNames.Has(*addr.NodeName) {
continue
}
nodeNames.Insert(*addr.NodeName)
node, err := l.nodeLister.Get(*addr.NodeName)
if err != nil {
klog.Errorf("failed to retrieve node object for %q: %v", *addr.NodeName, err)
continue
}
zone, err := l.zoneGetter.GetZoneForNode(node.Name)
if err != nil {
klog.Errorf("Unable to find zone for node %s, err %v, skipping", node.Name, err)
continue
}
nodeZoneMap[zone] = append(nodeZoneMap[zone], node)
}
}
if numEndpoints == 0 {
// TODO verify the behavior seen by a client when accessing an ILB whose NEGs have no endpoints.
return nil, nil, nil
}
// This denotes zones where the endpoint pods are running
numZones := len(nodeZoneMap)
perZoneCount := l.getPerZoneSubsetCount(numZones, numEndpoints)
// Compute the networkEndpoints, with endpointSet size in each zone being atmost `perZoneCount` in size
subsetMap, err := getSubsetPerZone(nodeZoneMap, perZoneCount, l.svcId, currentMap)
return subsetMap, nil, err
}

// getPerZoneSubsetCount returns the max size limit of each zonal NEG, given the number of zones and service endpoints.
// The subset size will be proportional to the endpoint size, as long as endpoints size is within the limit.
func (l *LocalL4ILBEndpointsCalculator) getPerZoneSubsetCount(numZones, numEndpoints int) int {
if numZones == 0 {
return 0
}
// Dividing by numZones can cause an off-by-one error depending on the numZones value.
// For instance, 250/3 = 83, 83*3 = 249, i.e 250 - 1
if numEndpoints > l.subsetSizeLimit {
return l.subsetSizeLimit / numZones
}
// If there are 2 endpoints and 3 zones, we want to pick atleast one per zone.
if numEndpoints > 0 && numEndpoints < numZones {
return 1
}
return numEndpoints / numZones
}

// ClusterL4ILBEndpointGetter implements methods to calculate Network endpoints for VM_PRIMARY_IP NEGs when the service
// uses "ExternalTrafficPolicy: Cluster" mode This is the default mode.
type ClusterL4ILBEndpointsCalculator struct {
nodeLister listers.NodeLister
zoneGetter types.ZoneGetter
subsetSizeLimit int
svcId string
}

func NewClusterL4ILBEndpointsCalculator(nodeLister listers.NodeLister, zoneGetter types.ZoneGetter, svcId string) *ClusterL4ILBEndpointsCalculator {
return &ClusterL4ILBEndpointsCalculator{nodeLister: nodeLister, zoneGetter: zoneGetter,
subsetSizeLimit: maxSubsetSizeDefault, svcId: svcId}
}

// CalculateEndpoints determines the endpoints in the NEGs based on the current service endpoints and the current NEGs.
func (l *ClusterL4ILBEndpointsCalculator) CalculateEndpoints(ep *v1.Endpoints, currentMap map[string]types.NetworkEndpointSet) (map[string]types.NetworkEndpointSet, types.EndpointPodMap, error) {
// In this mode, any of the cluster nodes can be part of the subset, whether or not a matching pod runs on it.
nodes, _ := l.nodeLister.ListWithPredicate(utils.GetNodeConditionPredicate())

nodeZoneMap := make(map[string][]*v1.Node)
for _, node := range nodes {
zone, err := l.zoneGetter.GetZoneForNode(node.Name)
if err != nil {
klog.Errorf("Unable to find zone for node %s, err %v, skipping", node.Name, err)
continue
}
nodeZoneMap[zone] = append(nodeZoneMap[zone], node)
}
numZones := len(nodeZoneMap)
// This value is always SubsetSizeLimit/numZones, in this mode. Passing in numEndpoints as 0 to avoid unnecessary
// calculation.
// If number of endpoints matter in the calculation, this can be changed to:
// perZoneCount := l.getPerZoneSubsetCount(numZones, utils.NumEndpoints(ep))
perZoneCount := l.getPerZoneSubsetCount(numZones, 0)
// Compute the networkEndpoints, with endpointSet size in each zone being atmost `perZoneCount` in size
subsetMap, err := getSubsetPerZone(nodeZoneMap, perZoneCount, l.svcId, currentMap)
return subsetMap, nil, err
}

// getPerZoneSubsetCount returns the max size limit of each zonal NEG, given the number of zones and service endpoints.
func (l *ClusterL4ILBEndpointsCalculator) getPerZoneSubsetCount(numZones, numEndpoints int) int {
if numZones == 0 {
return 0
}
// Use the static limit instead of making it proportional to service size.
// This will help minimize changes to the NEGs. Since NEG endpoints are picked at random in this mode,
// irrespective of service endpoints, using the static limit is ok.
return l.subsetSizeLimit / numZones
}

// L7EndpointsCalculator implements methods to calculate Network endpoints for VM_IP_PORT NEGs
type L7EndpointsCalculator struct {
zoneGetter types.ZoneGetter
servicePortName string
podLister cache.Indexer
subsetLabels string
networkEndpointType types.NetworkEndpointType
}

func NewL7EndpointsCalculator(zoneGetter types.ZoneGetter, podLister cache.Indexer, svcPortName, subsetLabels string, endpointType types.NetworkEndpointType) *L7EndpointsCalculator {
return &L7EndpointsCalculator{
zoneGetter: zoneGetter,
servicePortName: svcPortName,
podLister: podLister,
subsetLabels: subsetLabels,
networkEndpointType: endpointType,
}
}

// CalculateEndpoints determines the endpoints in the NEGs based on the current service endpoints and the current NEGs.
func (l *L7EndpointsCalculator) CalculateEndpoints(ep *v1.Endpoints, currentMap map[string]types.NetworkEndpointSet) (map[string]types.NetworkEndpointSet, types.EndpointPodMap, error) {
return toZoneNetworkEndpointMap(ep, l.zoneGetter, l.servicePortName, l.podLister, l.subsetLabels, "")
}
65 changes: 4 additions & 61 deletions pkg/neg/syncers/subsets.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,8 @@ package syncers
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"sort"

"k8s.io/klog"

"k8s.io/api/core/v1"
negtypes "k8s.io/ingress-gce/pkg/neg/types"
"k8s.io/ingress-gce/pkg/utils"
Expand Down Expand Up @@ -118,37 +115,13 @@ func pickSubsetsMinRemovals(nodes []*v1.Node, salt string, count int, current []
return subset
}

// getSubsetPerZone groups the given list of nodes by zone, ensuring that there is a
// non-zero subset from each zone.
// getSubsetPerZone creates a subset of nodes from the given list of nodes, for each zone provided.
// The output is a map of zone string to NEG subset.
func getSubsetPerZone(nodes []*v1.Node, zoneGetter negtypes.ZoneGetter, svcID string, currentMap map[string]negtypes.NetworkEndpointSet, newEpCount int, randomize bool) (map[string]negtypes.NetworkEndpointSet, error) {
func getSubsetPerZone(nodesPerZone map[string][]*v1.Node, perZoneCount int, svcID string, currentMap map[string]negtypes.NetworkEndpointSet) (map[string]negtypes.NetworkEndpointSet, error) {
result := make(map[string]negtypes.NetworkEndpointSet)
zoneMap := make(map[string][]*v1.Node)
for _, node := range nodes {
zone, err := zoneGetter.GetZoneForNode(node.Name)
if err != nil {
klog.Errorf("Unable to find zone for node %s, err %v, skipping", node.Name, err)
continue
}
zoneMap[zone] = append(zoneMap[zone], node)
}
numZones := len(zoneMap)
if numZones == 0 {
return nil, fmt.Errorf("Expected nodes in atleast one zone, got nodes %+v", nodes)
}
currentEpCount := 0
for _, set := range currentMap {
currentEpCount += set.Len()
}
// subsetSize is equal to numZones if there are no endpoints, so one node from each zone is selected.
subsetSize := getSubsetCount(currentEpCount, newEpCount, numZones, randomize)
// This algorithm picks atmost 'perZoneSubset' number of nodes from each zone.
// If there are fewer nodes in one zone, more nodes are NOT picked from other zones.
// TODO(prameshj) fix this.
perZoneSubset := subsetSize / numZones
var currentList []negtypes.NetworkEndpoint

for zone, nodesInZone := range zoneMap {
for zone, nodes := range nodesPerZone {
result[zone] = negtypes.NewNetworkEndpointSet()
if currentMap != nil {
if zset, ok := currentMap[zone]; ok && zset != nil {
Expand All @@ -157,40 +130,10 @@ func getSubsetPerZone(nodes []*v1.Node, zoneGetter negtypes.ZoneGetter, svcID st
currentList = nil
}
}
subset := pickSubsetsMinRemovals(nodesInZone, svcID, perZoneSubset, currentList)
subset := pickSubsetsMinRemovals(nodes, svcID, perZoneCount, currentList)
for _, node := range subset {
result[zone].Insert(negtypes.NetworkEndpoint{Node: node.Name, IP: utils.GetNodePrimaryIP(node)})
}
}
return result, nil
}

// getSubsetCount computes the size of the subset based on input parameters.
// If there are no endpoints, the subset count is same as the number of cluster zones.
// In the non-random mode, the subset size is equal to the endpoint count, bound by a limit.
// In the random mode, the subset is equal to the current or new endpoint count, whichever is larger. This is also
// bound by a limit.
func getSubsetCount(currentCount, newCount, numZones int, randomize bool) int {
if newCount == 0 {
// no endpoints for this service, use the zone count and pick one node per zone.
newCount = numZones
}
if randomize {
// ExternalTrafficPolicy: Cluster mode
// removals to be kept to a minimum in this mode. Use the current subset count if that is larger than the
// new count, so the subset does not shrink and drop nodes.
if newCount < currentCount {
newCount = currentCount
}
return min(newCount, maxSubsetSizeDefault)
}
// Pick the new count of endpoints, or the limit, whichever is smaller.
return min(newCount, maxSubsetSizeLocal)
}

func min(val, limit int) int {
if val < limit {
return val
}
return limit
}
43 changes: 24 additions & 19 deletions pkg/neg/syncers/transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ type transactionSyncer struct {
// transactions stores each transaction
transactions networkEndpointTransactionTable

podLister cache.Indexer
serviceLister cache.Indexer
endpointLister cache.Indexer
nodeLister cache.Indexer
recorder record.EventRecorder
cloud negtypes.NetworkEndpointGroupCloud
zoneGetter negtypes.ZoneGetter

podLister cache.Indexer
serviceLister cache.Indexer
endpointLister cache.Indexer
nodeLister cache.Indexer
recorder record.EventRecorder
cloud negtypes.NetworkEndpointGroupCloud
zoneGetter negtypes.ZoneGetter
endpointsCalculator negtypes.NetworkEndpointsCalculator
// This only applies in the GCE_VM_PRIMARY_IP NEG.
// randomize indicates that the endpoints of the NEG can be picked at random, rather
// than following the endpoints of the service.
Expand Down Expand Up @@ -90,6 +90,8 @@ func NewTransactionSyncer(negSyncerKey negtypes.NegSyncerKey, networkEndpointGro
randomize: randomizeEndpoints,
reflector: reflector,
}
// determine the implementation that calculates NEG endpoints on each sync.
ts.endpointsCalculator = getEndpointsCalculator(ts)
// Syncer implements life cycle logic
syncer := newSyncer(negSyncerKey, networkEndpointGroupName, serviceLister, recorder, ts)
// transactionSyncer needs syncer interface for internals
Expand All @@ -98,6 +100,19 @@ func NewTransactionSyncer(negSyncerKey negtypes.NegSyncerKey, networkEndpointGro
return syncer
}

func getEndpointsCalculator(syncer *transactionSyncer) negtypes.NetworkEndpointsCalculator {
serviceKey := strings.Join([]string{syncer.Name, syncer.Namespace}, "/")
if syncer.NegSyncerKey.NegType == negtypes.VmPrimaryIpEndpointType {
nodeLister := listers.NewNodeLister(syncer.nodeLister)
if syncer.randomize {
return NewClusterL4ILBEndpointsCalculator(nodeLister, syncer.zoneGetter, serviceKey)
}
return NewLocalL4ILBEndpointsCalculator(nodeLister, syncer.zoneGetter, serviceKey)
}
return NewL7EndpointsCalculator(syncer.zoneGetter, syncer.podLister, syncer.PortTuple.Name,
syncer.NegSyncerKey.SubsetLabels, syncer.NegSyncerKey.NegType)
}

func (s *transactionSyncer) sync() error {
err := s.syncInternal()
if err != nil {
Expand Down Expand Up @@ -149,17 +164,7 @@ func (s *transactionSyncer) syncInternal() error {
// The combined state represents the eventual result when all transactions completed
mergeTransactionIntoZoneEndpointMap(currentMap, s.transactions)

var targetMap map[string]negtypes.NetworkEndpointSet
var endpointPodMap negtypes.EndpointPodMap

switch {
case s.NegSyncerKey.NegType == negtypes.VmPrimaryIpEndpointType:
nodeLister := listers.NewNodeLister(s.nodeLister)
serviceKey := strings.Join([]string{s.Name, s.Namespace}, "/")
targetMap, err = toZonePrimaryIPEndpointMap(ep.(*apiv1.Endpoints), nodeLister, s.zoneGetter, s.randomize, currentMap, serviceKey)
default:
targetMap, endpointPodMap, err = toZoneNetworkEndpointMap(ep.(*apiv1.Endpoints), s.zoneGetter, s.PortTuple.Name, s.podLister, s.NegSyncerKey.SubsetLabels, s.NegSyncerKey.NegType)
}
targetMap, endpointPodMap, err := s.endpointsCalculator.CalculateEndpoints(ep.(*apiv1.Endpoints), currentMap)

// Calculate the endpoints to add and delete to transform the current state to desire state
addEndpoints, removeEndpoints := calculateNetworkEndpointDifference(targetMap, currentMap)
Expand Down
Loading

0 comments on commit 4bd379b

Please sign in to comment.