Skip to content

Commit

Permalink
support dump scheduler cache snapshot to json file
Browse files Browse the repository at this point in the history
Signed-off-by: lowang-bh <lhui_wang@163.com>
  • Loading branch information
lowang-bh committed Dec 21, 2023
1 parent 870483f commit 111d3a1
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 6 deletions.
3 changes: 3 additions & 0 deletions cmd/scheduler/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,12 @@ type ServerOption struct {
PercentageOfNodesToFind int32

NodeSelector []string
CacheDumpFileDir string
EnableCacheDumper bool
NodeWorkerThreads uint32
}

// DecryptFunc is custom function to parse ca file
type DecryptFunc func(c *ServerOption) error

// ServerOpts server options.
Expand Down Expand Up @@ -133,6 +135,7 @@ func (s *ServerOption) AddFlags(fs *pflag.FlagSet) {
fs.BoolVar(&s.EnableMetrics, "enable-metrics", false, "Enable the metrics function; it is false by default")
fs.StringSliceVar(&s.NodeSelector, "node-selector", nil, "volcano only work with the labeled node, like: --node-selector=volcano.sh/role:train --node-selector=volcano.sh/role:serving")
fs.BoolVar(&s.EnableCacheDumper, "cache-dumper", true, "Enable the cache dumper, it's true by default")
fs.StringVar(&s.CacheDumpFileDir, "cache-dump-dir", "/tmp", "The target dir where the json file put at when dump cache info to json file")
fs.Uint32Var(&s.NodeWorkerThreads, "node-worker-threads", defaultNodeWorkers, "The number of threads syncing node operations.")
}

Expand Down
1 change: 1 addition & 0 deletions cmd/scheduler/app/options/options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ func TestAddFlags(t *testing.T) {
EnableLeaderElection: true,
LockObjectNamespace: defaultLockObjectNamespace,
NodeWorkerThreads: defaultNodeWorkers,
CacheDumpFileDir: "/tmp",
}

if !reflect.DeepEqual(expected, s) {
Expand Down
40 changes: 35 additions & 5 deletions pkg/scheduler/cache/dumper.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@
package cache

import (
"encoding/json"
"fmt"
"os"
"os/signal"
"path"
"runtime"
"strings"
"syscall"
"time"

"k8s.io/klog/v2"

Expand All @@ -32,9 +36,31 @@ import (
// for debugging purposes. Usage: run `kill -s USR2 <pid>` in the shell, where <pid>
// is the process id of the scheduler process.
type Dumper struct {
Cache Cache
Cache Cache
RootDir string // target directory for the dumped json file
}

// dumpToJSONFile marsh scheduler cache snapshot to json file
func (d *Dumper) dumpToJSONFile() {
snapshot := d.Cache.Snapshot()
name := fmt.Sprintf("snapshot-%d.json", time.Now().Unix())
fName := path.Join(d.RootDir, name)
file, err := os.OpenFile(fName, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
klog.Errorf("error creating snapshot because of error creating file: %v", err)
return
}
defer file.Close()
klog.Infoln("Starting to dump info in scheduler cache to file", fName)
if err = json.NewEncoder(file).Encode(snapshot.Nodes); err != nil {
klog.Errorf("Failed to dump info in scheduler cache, json encode error: %v", err)
return
}

klog.Infoln("Successfully dump info in scheduler cache to file", fName)
}

// dumpAll prints all information to log
func (d *Dumper) dumpAll() {
snapshot := d.Cache.Snapshot()
klog.Info("Dump of nodes info in scheduler cache")
Expand Down Expand Up @@ -73,16 +99,20 @@ func (d *Dumper) printJobInfo(jobInfo *api.JobInfo) string {
}

// ListenForSignal starts a goroutine that will respond when process
// receives SIGUSER2 signal.
// receives SIGUSER1/SIGUSER2 signal.
func (d *Dumper) ListenForSignal(stopCh <-chan struct{}) {
ch := make(chan os.Signal, 1)
signal.Notify(ch, syscall.SIGUSR2)
ch1 := make(chan os.Signal, 1)
ch2 := make(chan os.Signal, 1)
signal.Notify(ch1, syscall.SIGUSR1)
signal.Notify(ch2, syscall.SIGUSR2)
go func() {
for {
select {
case <-stopCh:
return
case <-ch:
case <-ch1:
d.dumpToJSONFile()
case <-ch2:
d.dumpAll()
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func NewScheduler(config *rest.Config, opt *options.ServerOption) (*Scheduler, e
fileWatcher: watcher,
cache: cache,
schedulePeriod: opt.SchedulePeriod,
dumper: schedcache.Dumper{Cache: cache},
dumper: schedcache.Dumper{Cache: cache, RootDir: opt.CacheDumpFileDir},
}

return scheduler, nil
Expand Down

0 comments on commit 111d3a1

Please sign in to comment.