Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support scheduled actions and cancellation #419

Merged
merged 7 commits into from
May 24, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ linters:
- noctx # noctx finds sending http request without context.Context
- unconvert # Remove unnecessary type conversions
- wastedassign # wastedassign finds wasted assignment statements.
- godox # tool for detection of FIXME, TODO and other comment keywords
# - godox # tool for detection of FIXME, TODO and other comment keywords

# all available settings of specific linters
linters-settings:
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,4 @@
- Fix download verification in snapshot builds. {issue}252[252]
- Add support for kubernetes cronjobs {pull}279[279]
- Increase the download artifact timeout to 10mins and add log download statistics. {pull}308[308]
- Support scheduled actions and cancellation of pending actions. {issue}393[393] {pull}419[419]
32 changes: 32 additions & 0 deletions NOTICE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14042,6 +14042,38 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
Dependency : github.com/stretchr/objx
Version: v0.2.0
Licence type (autodetected): MIT
--------------------------------------------------------------------------------

Contents of probable licence file $GOMODCACHE/github.com/stretchr/objx@v0.2.0/LICENSE:

The MIT License

Copyright (c) 2014 Stretchr, Inc.
Copyright (c) 2017-2018 objx contributors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


--------------------------------------------------------------------------------
Dependency : github.com/tklauser/go-sysconf
Version: v0.3.9
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ require (
github.com/sergi/go-diff v1.1.0 // indirect
github.com/sirupsen/logrus v1.8.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.2.0 // indirect
github.com/tklauser/go-sysconf v0.3.9 // indirect
github.com/tklauser/numcpus v0.3.0 // indirect
github.com/yusufpapurcu/wmi v1.2.2 // indirect
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1117,6 +1117,7 @@ github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag
github.com/stretchr/objx v0.0.0-20180129172003-8a3f7159479f/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/testify v0.0.0-20180303142811-b89eecf5ca5d/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
Expand Down
97 changes: 93 additions & 4 deletions internal/pkg/agent/application/gateway/fleet/fleet_gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package fleet

import (
"context"
stderr "errors"
"fmt"
"sync"
"time"
Expand Down Expand Up @@ -62,6 +63,14 @@ type stateStore interface {
AckToken() string
SetAckToken(ackToken string)
Save() error
SetQueue([]fleetapi.Action)
Actions() []fleetapi.Action
}

type actionQueue interface {
Add(fleetapi.Action, int64)
DequeueActions() []fleetapi.Action
Cancel(string) int
Actions() []fleetapi.Action
}

Expand All @@ -82,6 +91,7 @@ type fleetGateway struct {
statusController status.Controller
statusReporter status.Reporter
stateStore stateStore
queue actionQueue
}

// New creates a new fleet gateway
Expand All @@ -95,6 +105,7 @@ func New(
acker store.FleetAcker,
statusController status.Controller,
stateStore stateStore,
queue actionQueue,
) (gateway.FleetGateway, error) {

scheduler := scheduler.NewPeriodicJitter(defaultGatewaySettings.Duration, defaultGatewaySettings.Jitter)
Expand All @@ -110,6 +121,7 @@ func New(
acker,
statusController,
stateStore,
queue,
)
}

Expand All @@ -125,6 +137,7 @@ func newFleetGatewayWithScheduler(
acker store.FleetAcker,
statusController status.Controller,
stateStore stateStore,
queue actionQueue,
) (gateway.FleetGateway, error) {

// Backoff implementation doesn't support the use of a context [cancellation]
Expand All @@ -151,13 +164,14 @@ func newFleetGatewayWithScheduler(
statusReporter: statusController.RegisterComponent("gateway"),
statusController: statusController,
stateStore: stateStore,
queue: queue,
}, nil
}

func (f *fleetGateway) worker() {
for {
select {
case <-f.scheduler.WaitTick():
case ts := <-f.scheduler.WaitTick():
f.log.Debug("FleetGateway calling Checkin API")

// Execute the checkin call and for any errors returned by the fleet-server API
Expand All @@ -168,12 +182,30 @@ func (f *fleetGateway) worker() {
continue
}

actions := make([]fleetapi.Action, len(resp.Actions))
for idx, a := range resp.Actions {
actions[idx] = a
actions := f.queueScheduledActions(resp.Actions)
actions, err = f.dispatchCancelActions(actions)
if err != nil {
f.log.Error(err.Error())
// TODO set errMsg? update status?
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved
}

queued, expired := f.gatherQueuedActions(ts.UTC())
f.log.Debugf("Gathered %d actions from queue, %d actions expired", len(queued), len(expired))

// TODO update all actions in expired as aborted?
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved

actions = append(actions, queued...)

var errMsg string
// Persist state
f.stateStore.SetQueue(f.queue.Actions())
if err := f.stateStore.Save(); err != nil {
errMsg = fmt.Sprintf("failed to persist action_queue, error: %s", err)
f.log.Error(errMsg)
f.statusReporter.Update(state.Failed, errMsg, nil)
// TODO should we handle this failure differently?
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to do anything differently if we are unable to save state here?

}

if err := f.dispatcher.Dispatch(context.Background(), f.acker, actions...); err != nil {
errMsg = fmt.Sprintf("failed to dispatch actions, error: %s", err)
f.log.Error(errMsg)
Expand All @@ -194,6 +226,63 @@ func (f *fleetGateway) worker() {
}
}

// queueScheduledActions will add any action in actions with a valid start time to the queue and return the rest.
// start time to current time comparisons are purposefully not made in case of cancel actions.
func (f *fleetGateway) queueScheduledActions(input fleetapi.Actions) []fleetapi.Action {
actions := make([]fleetapi.Action, 0, len(input))
for _, action := range input {
start, err := action.StartTime()
if err == nil {
f.log.Debugf("Adding action id: %s to queue.", action.ID())
f.queue.Add(action, start.Unix())
continue
// TODO persist queue here?
// f.stateStore.SetQueue(f.queue.Actions())
// f.stateStore.Save()
}
if !stderr.Is(err, fleetapi.ErrNoStartTime) {
f.log.Warnf("Issue gathering start time from action id %s: %v", action.ID(), err)
}
actions = append(actions, action)
}
return actions
}

// dispatchCancelActions will separate and dispatch any cancel actions from the actions list and return the rest of the list.
// cancel actions are dispatched seperatly as they may remove items from the queue.
func (f *fleetGateway) dispatchCancelActions(actions []fleetapi.Action) ([]fleetapi.Action, error) {
// separate cancel actions from the actions list
cancelActions := make([]fleetapi.Action, 0, len(actions))
for i := len(actions) - 1; i >= 0; i-- {
action := actions[i]
if action.Type() == fleetapi.ActionTypeCancel {
cancelActions = append(cancelActions, action)
actions = append(actions[:i], actions[i+1:]...)
}
}
// Dispatch cancel actions
if len(cancelActions) > 0 {
if err := f.dispatcher.Dispatch(context.Background(), f.acker, cancelActions...); err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Question | Suggestion]
Do we expect to have a context to pass to Dispatch at some point? If so, I'd suggest to use context.TODO() to indicate that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ensuring proper contexts is a larger issue then what I would like to do for this PR. I've made #464 to track it

return actions, fmt.Errorf("failed to dispatch cancel actions: %w", err)
}
}
return actions, nil
}

// gatherQueuedActions will dequeue actions from the action queue and separate those that have already expired.
func (f *fleetGateway) gatherQueuedActions(ts time.Time) (queued, expired []fleetapi.Action) {
actions := f.queue.DequeueActions()
for _, action := range actions {
exp, _ := action.Expiration()
if ts.After(exp) {
expired = append(expired, action)
continue
}
queued = append(queued, action)
}
return queued, expired
}

func (f *fleetGateway) doExecute() (*fleetapi.CheckinResponse, error) {
f.backoff.Reset()

Expand Down
Loading