From c512f03a5da5b7ed47771f1dcf49013eac3374f4 Mon Sep 17 00:00:00 2001 From: Andreas Peters Date: Wed, 18 Oct 2023 08:19:44 +0200 Subject: [PATCH] ADD: agent count check. --- changelog.md | 2 +- docs/example/mesos-compose.yml | 1 + docs/example/test-http.yaml | 6 ------ mesos/mesos.go | 8 ++++++-- scheduler/handle_offers.go | 5 ++++- scheduler/heartbeat.go | 14 ++++++++------ 6 files changed, 20 insertions(+), 16 deletions(-) diff --git a/changelog.md b/changelog.md index 06a4f3f..caa4753 100644 --- a/changelog.md +++ b/changelog.md @@ -32,7 +32,7 @@ - FIX: Conflict between reconcile and heatbeat could end in a task restart loop - ADD: Parameter to configure the Mesos Task DiscoveryInfoName Delimiter `DISCOVERY_INFONAME_DELIMITER`. Default value is ".". - ADD: Parameter to configure the Mesos Task DiscoveryPortName Delimiter `DISCOVERY_PORTNAME_DELIMITER`. Default value is "_". -- ADD: Constraint `unique` to run a only one instance of a task per node. +- ADD: Constraint `unique` to run only one instance of a task per node. ## 0.4.2 diff --git a/docs/example/mesos-compose.yml b/docs/example/mesos-compose.yml index 686ea6a..ae320d6 100644 --- a/docs/example/mesos-compose.yml +++ b/docs/example/mesos-compose.yml @@ -54,6 +54,7 @@ services: - "node.hostname==localhost" - "node.platform.os==linux" - "node.platform.arch==arm" + - "unique" replicas: 1 resources: limits: diff --git a/docs/example/test-http.yaml b/docs/example/test-http.yaml index 2b71879..bcfab0d 100644 --- a/docs/example/test-http.yaml +++ b/docs/example/test-http.yaml @@ -18,9 +18,3 @@ services: restart: always deploy: replicas: 1 - -networks: - default: - external: true - name: weave - diff --git a/mesos/mesos.go b/mesos/mesos.go index ed63ad8..dc841b2 100644 --- a/mesos/mesos.go +++ b/mesos/mesos.go @@ -22,6 +22,7 @@ type Mesos struct { Framework *cfg.FrameworkConfig IsSuppress bool IsRevive bool + CountAgent int } // Marshaler to serialize Protobuf Message to JSON @@ -167,7 +168,7 @@ func (e *Mesos) Call(message *mesosproto.Call) error { body, err := ioutil.ReadAll(res.Body) if err != nil { logrus.WithField("func", "mesos.Call").Error("Call Handling (could not read res.Body)") - return fmt.Errorf("Error %d", res.StatusCode) + return fmt.Errorf("error %d", res.StatusCode) } logrus.WithField("func", "mesos.Call").Error("Call Handling: ", string(body)) @@ -276,7 +277,7 @@ func (e *Mesos) GetAgentInfo(agentID string) cfg.MesosSlaves { res, err := client.Do(req) if err != nil { - logrus.WithField("func", "getAgentInfo").Error("Could not connect to agent: ", err.Error()) + logrus.WithField("func", "mesos.getAgentInfo").Error("Could not connect to master: ", err.Error()) return cfg.MesosSlaves{} } @@ -295,6 +296,9 @@ func (e *Mesos) GetAgentInfo(agentID string) cfg.MesosSlaves { return cfg.MesosSlaves{} } + // save how many agents the cluster has + e.CountAgent = len(agent.Slaves) + // get the used agent info for _, a := range agent.Slaves { if a.ID == agentID { diff --git a/scheduler/handle_offers.go b/scheduler/handle_offers.go index 502b4de..2dce5c7 100644 --- a/scheduler/handle_offers.go +++ b/scheduler/handle_offers.go @@ -118,15 +118,18 @@ func (e *Scheduler) getOffer(offers *mesosproto.Event_Offers, cmd cfg.Command) ( if e.getLabelValue("__mc_placement", cmd) == "unique" { if e.alreadyRunningOnHostname(cmd, offer) { + logrus.WithField("func", "scheduler.getOffer").Debug("UNIQUE: Already running on node: ", offer.GetHostname()) continue } } if !e.isAttributeMachted("__mc_placement_node_platform_os", "os", cmd, offer) { + logrus.WithField("func", "scheduler.getOffer").Debug("OS: Does not match Attribute") continue } if !e.isAttributeMachted("__mc_placement_node_platform_arch", "arch", cmd, offer) { + logrus.WithField("func", "scheduler.getOffer").Debug("OS: Does not match Attribute") continue } @@ -164,7 +167,7 @@ func (e *Scheduler) alreadyRunningOnHostname(cmd cfg.Command, offer mesosproto.O continue } - if task.MesosAgent.Hostname == cmd.MesosAgent.Hostname { + if task.MesosAgent.Hostname == cmd.MesosAgent.Hostname && task.TaskID != cmd.TaskID { return true } } diff --git a/scheduler/heartbeat.go b/scheduler/heartbeat.go index d1ec727..ca5de2f 100644 --- a/scheduler/heartbeat.go +++ b/scheduler/heartbeat.go @@ -46,12 +46,14 @@ func (e *Scheduler) Heartbeat() { // there are lesser instances are running as it should be if e.Redis.CountRedisKey(task.TaskName+":*", "__KILL") < task.Instances { - logrus.WithField("func", "scheduler.CheckState").Info("Scale up Mesos Task: ", task.TaskName) - e.Mesos.Revive() - task.State = "" - task.TaskID = e.API.IncreaseTaskCount(task.TaskID) - e.Redis.SaveTaskRedis(task) - continue + if e.getLabelValue("__mc_placement", task) != "unique" && e.Mesos.CountAgent >= task.Instances { + logrus.WithField("func", "scheduler.CheckState").Info("Scale up Mesos Task: ", task.TaskName) + e.Mesos.Revive() + task.State = "" + task.TaskID = e.API.IncreaseTaskCount(task.TaskID) + e.Redis.SaveTaskRedis(task) + continue + } } // there are more instances are running as it should be