Skip to content

Commit

Permalink
enable snapshot extractor.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 675365359
  • Loading branch information
slowest-sloth authored and copybara-github committed Sep 26, 2024
1 parent 808b657 commit 198cdb8
Show file tree
Hide file tree
Showing 16 changed files with 305 additions and 95 deletions.
5 changes: 5 additions & 0 deletions binary/proto/proto.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,11 @@ func setProtoMetadata(meta any, i *spb.Inventory) {
ImageDigest: m.ImageDigest,
Runtime: m.Runtime,
InitProcessPid: int32(m.InitProcessPID),
Snapshotter: m.Snapshotter,
SnapshotKey: m.SnapshotKey,
LowerDir: m.LowerDir,
UpperDir: m.UpperDir,
WorkDir: m.WorkDir,
},
}
case *ctrdruntime.Metadata:
Expand Down
10 changes: 10 additions & 0 deletions binary/proto/proto_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,11 @@ func TestScanResultToProto(t *testing.T) {
ImageDigest: "sha256:b1455e1c4fcc5ea1023c9e3b584cd84b64eb920e332feff690a2829696e379e7",
Runtime: "io.containerd.runc.v2",
InitProcessPID: 8915,
Snapshotter: "overlayfs",
SnapshotKey: "abcweraweroiuojgawer1",
LowerDir: "/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/1/fs",
UpperDir: "/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/4/fs",
WorkDir: "/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/4/work",
},
Locations: []string{"/file4"},
Extractor: &ctrdfs.Extractor{},
Expand All @@ -470,6 +475,11 @@ func TestScanResultToProto(t *testing.T) {
ImageDigest: "sha256:b1455e1c4fcc5ea1023c9e3b584cd84b64eb920e332feff690a2829696e379e7",
Runtime: "io.containerd.runc.v2",
InitProcessPid: 8915,
Snapshotter: "overlayfs",
SnapshotKey: "abcweraweroiuojgawer1",
LowerDir: "/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/1/fs",
UpperDir: "/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/4/fs",
WorkDir: "/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/4/work",
},
},
Locations: []string{"/file4"},
Expand Down
5 changes: 5 additions & 0 deletions binary/proto/scan_result.proto
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,11 @@ message ContainerdContainerMetadata {
string image_digest = 3;
string runtime = 4;
int32 init_process_pid = 5;
string snapshotter = 6;
string snapshot_key = 7;
string lower_dir = 8;
string upper_dir = 9;
string work_dir = 10;
}

message ContainerdRuntimeContainerMetadata {
Expand Down
10 changes: 8 additions & 2 deletions binary/proto/scan_result_go_proto/scan_result.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

229 changes: 192 additions & 37 deletions extractor/filesystem/containers/containerd/extractor_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,17 @@ const (
// Name is the unique name of this extractor.
Name = "containers/containerd"

// defaultMaxMetaDBFileSize is the maximum metadb size .
// If Extract gets a bigger metadb file, it will return an error.
defaultMaxMetaDBFileSize = 500 * units.MiB
// defaultMaxFileSize is the maximum file size.
// If Extract gets a bigger file, it will return an error.
defaultMaxFileSize = 500 * units.MiB

// Prefix of the path for container's grpc container status file, used to collect pid for a container.
criPluginStatusFilePrefix = "var/lib/containerd/io.containerd.grpc.v1.cri/containers/"

// Prefix of the path for runc state files, used to check if a container is running by runc.
runcStateFilePrefix = "run/containerd/runc/"
// Prefix of the path for snapshotter overlayfs snapshots folders.
overlayfsSnapshotsPath = "var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots"
// The path for the metadata.db file which will be used to parse the mapping between folders and container's mount points.
snapshotterMetadataDBPath = "var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/metadata.db"

// Prefix of the path for runhcs state files, used to check if a container is running by runhcs.
runhcsStateFilePrefix = "ProgramData/containerd/state/io.containerd.runtime.v2.task/"
Expand All @@ -65,7 +70,7 @@ type Config struct {
// DefaultConfig returns the default configuration for the containerd extractor.
func DefaultConfig() Config {
return Config{
MaxMetaDBFileSize: defaultMaxMetaDBFileSize,
MaxMetaDBFileSize: defaultMaxFileSize,
}
}

Expand Down Expand Up @@ -97,7 +102,7 @@ func (e Extractor) Version() int { return 0 }
// Requirements of the extractor.
func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{DirectFS: true} }

// FileRequired returns true if the specified file matches containerd metadb file pattern.
// FileRequired returns true if the specified file matches containerd metaDB file pattern.
func (e Extractor) FileRequired(path string, _ fs.FileInfo) bool {
// On Windows the metadb file is expected to be located at the
// <scanRoot>/ProgramData/containerd/root/io.containerd.metadata.v1.bolt/meta.db path.
Expand Down Expand Up @@ -128,7 +133,21 @@ func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]

defer metaDB.Close()

ctrMetadata, err := containersFromMetaDB(ctx, metaDB, input.Root)
var snapshotsMetadata []SnapshotMetadata
// If it's linux, parse the default overlayfs snapshotter metadata.db file.
if input.Path == "var/lib/containerd/io.containerd.metadata.v1.bolt/meta.db" {
fullMetadataDBPath := filepath.Join(input.Root, snapshotterMetadataDBPath)
safeToOpen, err := fileSizeCheck(fullMetadataDBPath, e.maxMetaDBFileSize)
if err != nil {
return inventory, fmt.Errorf("Could not read the containerd metadb file: %v", err)
}
snapshotsMetadata, err = snapshotsMetadataFromDB(fullMetadataDBPath, safeToOpen, e.maxMetaDBFileSize, "overlayfs")
if err != nil {
return inventory, fmt.Errorf("Could not collect snapshots metadata from DB: %v", err)
}
}

ctrMetadata, err := containersFromMetaDB(ctx, metaDB, input.Root, snapshotsMetadata)
if err != nil {
log.Errorf("Could not get container inventory from the containerd metadb file: %v", err)
return inventory, err
Expand All @@ -143,10 +162,22 @@ func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]
}
inventory = append(inventory, pkg)
}

return inventory, nil
}

// This method checks if the given file is valid to be opened, and make sure it's not too large.
func fileSizeCheck(filepath string, maxFileSize int64) (safeToOpen bool, err error) {
fileInfo, err := os.Stat(filepath)
if err != nil {
return false, fmt.Errorf("Can't get the file info for %v because %v", filepath, err)
}
if fileInfo.Size() > maxFileSize {
return false, fmt.Errorf("File %s is too large: %d", filepath, fileInfo.Size())
}
return true, nil
}

// namespacesFromMetaDB returns the list of namespaces stored in the containerd metaDB file.
func namespacesFromMetaDB(ctx context.Context, metaDB *bolt.DB) ([]string, error) {
var namespaces []string

Expand All @@ -167,9 +198,8 @@ func namespacesFromMetaDB(ctx context.Context, metaDB *bolt.DB) ([]string, error
return namespaces, nil
}

func containersFromMetaDB(ctx context.Context, metaDB *bolt.DB, scanRoot string) ([]Metadata, error) {
func containersFromMetaDB(ctx context.Context, metaDB *bolt.DB, scanRoot string, snapshotsMetadata []SnapshotMetadata) ([]Metadata, error) {
var containersMetadata []Metadata

// Get list of namespaces from the containerd metadb file.
nss, err := namespacesFromMetaDB(ctx, metaDB)
if err != nil {
Expand Down Expand Up @@ -199,21 +229,140 @@ func containersFromMetaDB(ctx context.Context, metaDB *bolt.DB, scanRoot string)
if err != nil {
log.Errorf("Could not find the image for container %v, error: %v", id, err)
}

var lowerDir, upperDir, workDir string
// If the filesystem is overlayfs, then parse overlayfs metadata.db
if ctr.Snapshotter == "overlayfs" {
lowerDir, upperDir, workDir = collectDirs(scanRoot, snapshotsMetadata, ctr.SnapshotKey)
}

containersMetadata = append(containersMetadata,
Metadata{Namespace: ns,
ImageName: img.Name,
ImageDigest: img.Target.Digest.String(),
Runtime: ctr.Runtime.Name,
InitProcessPID: initPID})
InitProcessPID: initPID,
Snapshotter: ctr.Snapshotter,
SnapshotKey: ctr.SnapshotKey,
LowerDir: lowerDir,
UpperDir: upperDir,
WorkDir: workDir})
}
}
return containersMetadata, nil
}

func digestSnapshotInfoMapping(snapshotsMetadata []SnapshotMetadata) map[string]SnapshotMetadata {
digestSnapshotInfoMapping := make(map[string]SnapshotMetadata)
for _, snapshotMetadata := range snapshotsMetadata {
shorterDigest := snapshotMetadata.Digest[strings.LastIndex(snapshotMetadata.Digest, "/")+1:]
digestSnapshotInfoMapping[shorterDigest] = snapshotMetadata
}
return digestSnapshotInfoMapping
}

func collectDirs(scanRoot string, snapshotsMetadata []SnapshotMetadata, snapshotKey string) (string, string, string) {
var lowerDir, upperDir, workDir string
var lowerDirs []string
var parentSnapshotIDs []int
parentSnapshotIDs = getParentSnapshotIDByDigest(snapshotsMetadata, snapshotKey, parentSnapshotIDs)
for _, parentSnapshotID := range parentSnapshotIDs {
log.Infof("parentSnapshotID: ", parentSnapshotID)
lowerDirs = append(lowerDirs, filepath.Join(scanRoot, overlayfsSnapshotsPath, strconv.Itoa(parentSnapshotID), "fs"))
}
// lowerdir=/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/15/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/12/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/8/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/5/fs
lowerDir = strings.Join(lowerDirs, ":")
for _, snapshotMetadata := range snapshotsMetadata {
if strings.Contains(snapshotMetadata.Digest, snapshotKey) {
upperDir = filepath.Join(scanRoot, overlayfsSnapshotsPath, strconv.Itoa(snapshotMetadata.ID), "fs")
workDir = filepath.Join(scanRoot, overlayfsSnapshotsPath, strconv.Itoa(snapshotMetadata.ID), "work")
break
}
}
return lowerDir, upperDir, workDir
}

func getParentSnapshotIDByDigest(snapshotsMetadata []SnapshotMetadata, digest string, parentIDList []int) []int {
snapshotMetadataDict := digestSnapshotInfoMapping(snapshotsMetadata)
if strings.Contains(digest, "sha256:") {
// start from its parent snapshots.
parentIDList = append(parentIDList, snapshotMetadataDict[digest].ID)
}
if snapshotMetadataDict[digest].Parent != "" {
shorterDigest := snapshotMetadataDict[digest].Parent[strings.LastIndex(snapshotMetadataDict[digest].Parent, "/")+1:]
return getParentSnapshotIDByDigest(snapshotsMetadata, shorterDigest, parentIDList)
} else {
return parentIDList
}
}

// Parse the snapshots information from Metadata.db if db file is valid and not too large.
func snapshotsMetadataFromDB(fullMetadataDBPath string, safeToOpen bool, maxMetaDBFileSize int64, fileSystemDriver string) ([]SnapshotMetadata, error) {
// extracted snapshots metadata from the metadata.db file.
var snapshotsMetadata []SnapshotMetadata
// Check if the file is valid to be opened, and make sure it's not too large.
safeToOpen, err := fileSizeCheck(fullMetadataDBPath, maxMetaDBFileSize)
if err != nil {
return nil, fmt.Errorf("Could not read the containerd metadb file: %v", err)
}
if safeToOpen {
metadataDB, err := bolt.Open(fullMetadataDBPath, 0444, &bolt.Options{Timeout: 1 * time.Second})
if err != nil {
return nil, fmt.Errorf("Could not read the containerd metadb file: %v", err)
}
defer metadataDB.Close()
err = metadataDB.View(func(tx *bolt.Tx) error {
// v1->snapshots
snapshotsMetadataBucket := tx.Bucket([]byte("v1")).Bucket([]byte("snapshots"))
if snapshotsMetadataBucket == nil {
return fmt.Errorf("Could not find the snapshots bucket in the metadata.db file")
}
// Get the list of bucket names.These buckets stores snapshots information. Normally its name
// is the digest.
var snapshotsBucketByDigest []string
err := snapshotsMetadataBucket.ForEach(func(k []byte, v []byte) error {
// When the value is nil, it means it's a bucket. In this case, we would like to grab the
// bucket name and visit it later.
if v == nil {
snapshotsBucketByDigest = append(snapshotsBucketByDigest, string(k))
}
return nil
})

if err != nil {
log.Errorf("Not able to grab the names of the snapshot buckets: %v", err)
}
// Store the important info of the snapshots into snapshotMetadata struct.
snapshotsMetadata = snapshotMetadataFromSnapshotsBuckets(tx, snapshotsBucketByDigest, snapshotsMetadata, fileSystemDriver)
return nil
})
if err != nil {
log.Errorf("Not able to view the db: %v", err)
return nil, err
}
}
return snapshotsMetadata, nil
}

func snapshotMetadataFromSnapshotsBuckets(tx *bolt.Tx, snapshotsBucketByDigest []string, snapshotsMetadata []SnapshotMetadata, fileSystemDriver string) []SnapshotMetadata {
for _, shaDigest := range snapshotsBucketByDigest {
// grab the bucket by digest.
snapshotMetadataBucket := tx.Bucket([]byte("v1")).Bucket([]byte("snapshots")).Bucket([]byte(shaDigest))
// This id is the corresponding folder name in overlayfs/snapshots folder.
id := int(snapshotMetadataBucket.Get([]byte("id"))[0])
// The status of the snapshot.
kind := int(snapshotMetadataBucket.Get([]byte("kind"))[0])
// The parent snapshot of the snapshot.
parent := string(snapshotMetadataBucket.Get([]byte("parent")))
snapshotsMetadata = append(snapshotsMetadata, SnapshotMetadata{Digest: shaDigest, ID: id, Kind: kind, Parent: parent, FilesystemType: fileSystemDriver})
}
return snapshotsMetadata
}

func containerInitPid(scanRoot string, runtimeName string, namespace string, id string) int {
// A typical Linux case.
if runtimeName == "io.containerd.runc.v2" {
return runcInitPid(scanRoot, runtimeName, namespace, id)
return runcInitPid(scanRoot, runtimeName, id)
}

// A typical Windows case.
Expand All @@ -224,38 +373,44 @@ func containerInitPid(scanRoot string, runtimeName string, namespace string, id
return -1
}

func runcInitPid(scanRoot string, runtimeName string, namespace string, id string) int {
// If a container is running by runc, the init pid is stored in the runc state.json file.
// state.json file is located at the
// <scanRoot>/<runcStateFilePrefix>/<namespace_name>/<container_id>/state.json path.
statePath := filepath.Join(scanRoot, runcStateFilePrefix, namespace, id, "state.json")
if _, err := os.Stat(statePath); err != nil {
log.Info("File state.json does not exists for container %v, error: %v", id, err)
func runcInitPid(scanRoot string, runtimeName string, id string) int {
// If a container is running by runc, the init pid is stored in the grpc status file.
// status file is located at the
// <scanRoot>/<criPluginStatusFilePrefix>/<container_id>/state.json path.
statusPath := filepath.Join(scanRoot, criPluginStatusFilePrefix, id, "status")
if _, err := os.Stat(statusPath); err != nil {
log.Info("File status does not exists for container %v, error: %v", id, err)
return -1
}

stateContent, err := os.ReadFile(statePath)
safeToOpen, err := fileSizeCheck(statusPath, defaultMaxFileSize)
if err != nil {
log.Errorf("Could not read for %s state.json for container: %v, error: %v", id, err)
return -1
}
var runcState map[string]*json.RawMessage
if err := json.Unmarshal([]byte(stateContent), &runcState); err != nil {
log.Errorf("Can't unmarshal state.json for container %v , error: %v", id, err)
return -1
}
runcInitPID := runcState["init_process_pid"]
if runcInitPID == nil {
log.Errorf("Can't find field init_process_pid filed in state.json for container %v", id)
return -1
}

var initPID int
if err := json.Unmarshal(*runcInitPID, &initPID); err != nil {
log.Errorf("Can't find field init_process_pid in state.json for container %v, error: %v", id, err)
return -1
}
initPID := -1

if safeToOpen {
statusContent, err := os.ReadFile(statusPath)
if err != nil {
log.Errorf("Could not read for %s status for container: %v, error: %v", id, err)
return -1
}
var grpcContainerStatus map[string]*json.RawMessage
if err := json.Unmarshal([]byte(statusContent), &grpcContainerStatus); err != nil {
log.Errorf("Can't unmarshal status for container %v , error: %v", id, err)
return -1
}
statusinitPID := grpcContainerStatus["Pid"]
if statusinitPID == nil {
log.Errorf("Can't find field pid filed in status for container %v", id)
return -1
}
if err := json.Unmarshal(*statusinitPID, &initPID); err != nil {
log.Errorf("Can't find field pid in status for container %v, error: %v", id, err)
return -1
}
}
return initPID
}

Expand Down
Loading

0 comments on commit 198cdb8

Please sign in to comment.