Path: blob/main/components/ee/agent-smith/pkg/detector/filesystem.go
2501 views
// Copyright (c) 2024 Gitpod GmbH. All rights reserved.1// Licensed under the GNU Affero General Public License (AGPL).2// See License.AGPL.txt in the project root for license information.34package detector56import (7"context"8"fmt"9"os"10"path/filepath"11"strings"12"sync"13"time"1415"github.com/gitpod-io/gitpod/agent-smith/pkg/classifier"16"github.com/gitpod-io/gitpod/agent-smith/pkg/common"17"github.com/gitpod-io/gitpod/common-go/log"18)1920// FileDetector discovers suspicious files on the node21type FileDetector interface {22// DiscoverFiles based on a relative path match given the classifier's signatures23DiscoverFiles(ctx context.Context) (<-chan File, error)24}2526// File represents a file that might warrant closer inspection27type File struct {28Path string29Workspace *common.Workspace30Content []byte31Size int6432ModTime time.Time33}3435// fileDetector scans workspace filesystems for files matching signature criteria36type fileDetector struct {37mu sync.RWMutex38fs chan File3940config FileScanningConfig41classifier classifier.FileClassifier42lastScanTime time.Time4344startOnce sync.Once45}4647// FileScanningConfig holds configuration for file scanning48type FileScanningConfig struct {49Enabled bool50ScanInterval time.Duration51MaxFileSize int6452WorkingArea string53}5455var _ FileDetector = &fileDetector{}5657// NewfileDetector creates a new file detector58func NewfileDetector(config FileScanningConfig, fsClassifier classifier.FileClassifier) (*fileDetector, error) {59if !config.Enabled {60return nil, fmt.Errorf("file scanning is disabled")61}6263// Set defaults64if config.ScanInterval == 0 {65config.ScanInterval = 5 * time.Minute66}67if config.MaxFileSize == 0 {68config.MaxFileSize = 1024 // 1KB default69}70if config.WorkingArea == "" {71return nil, fmt.Errorf("workingArea must be specified")72}7374return &fileDetector{75config: config,76classifier: fsClassifier,77lastScanTime: time.Time{}, // Zero time means never scanned78}, nil79}8081func (det *fileDetector) start(ctx context.Context) {82fs := make(chan File, 100)83go func() {84ticker := time.NewTicker(det.config.ScanInterval)85defer ticker.Stop()8687for {88select {89case <-ctx.Done():90close(fs)91return92case <-ticker.C:93det.scanWorkspaces(fs)94}95}96}()9798go func() {99for f := range fs {100// Convert FilesystemFile to SuspiciousFile for compatibility101file := File{102Path: f.Path,103Workspace: f.Workspace,104Content: nil, // Content will be read by signature matcher105Size: f.Size,106ModTime: f.ModTime,107}108det.fs <- file109}110}()111112log.Info("filesystem detector started")113}114115func (det *fileDetector) scanWorkspaces(files chan<- File) {116// Get filesystem signatures to know what files to look for117filesystemSignatures := det.GetFileSignatures()118if len(filesystemSignatures) == 0 {119log.Warn("no filesystem signatures configured, skipping scan")120return121}122123// Scan working area directory for workspace directories124workspaceDirs, err := det.discoverWorkspaceDirectories()125if err != nil {126log.WithError(err).Error("failed to discover workspace directories")127return128}129130log.Infof("found %d workspace directories, scanning for %d filesystem signatures", len(workspaceDirs), len(filesystemSignatures))131132for _, wsDir := range workspaceDirs {133det.scanWorkspaceDirectory(wsDir, filesystemSignatures, files)134}135}136137// GetFileSignatures returns signatures that should be used for filesystem scanning138// These are extracted from the configured classifier139func (det *fileDetector) GetFileSignatures() []*classifier.Signature {140if det.classifier == nil {141return nil142}143144// Use the FileClassifier interface to get signatures145return det.classifier.GetFileSignatures()146}147148// discoverWorkspaceDirectories scans the working area for workspace directories149func (det *fileDetector) discoverWorkspaceDirectories() ([]WorkspaceDirectory, error) {150entries, err := os.ReadDir(det.config.WorkingArea)151if err != nil {152return nil, fmt.Errorf("cannot read working area %s: %w", det.config.WorkingArea, err)153}154155var workspaceDirs []WorkspaceDirectory156for _, entry := range entries {157if !entry.IsDir() {158continue159}160161// Skip hidden directories and service directories (ending with -daemon)162name := entry.Name()163if strings.HasPrefix(name, ".") || strings.HasSuffix(name, "-daemon") {164continue165}166167workspaceDir := WorkspaceDirectory{168InstanceID: name,169Path: filepath.Join(det.config.WorkingArea, name),170}171workspaceDirs = append(workspaceDirs, workspaceDir)172}173174return workspaceDirs, nil175}176177// WorkspaceDirectory represents a workspace directory on disk178type WorkspaceDirectory struct {179InstanceID string180Path string181}182183func (det *fileDetector) scanWorkspaceDirectory(wsDir WorkspaceDirectory, signatures []*classifier.Signature, files chan<- File) {184// Create a minimal workspace object for this directory185workspace := &common.Workspace{186InstanceID: wsDir.InstanceID,187// We don't have other workspace metadata from directory scanning188// These would need to be populated from other sources if needed189}190191// For each signature, check if any of its target files exist192for _, sig := range signatures {193for _, relativeFilePath := range sig.Filename {194matchingFiles := det.findMatchingFiles(wsDir.Path, relativeFilePath)195196for _, filePath := range matchingFiles {197// Check if file exists and get its info198info, err := os.Stat(filePath)199if err != nil {200continue201}202203// Skip directories204if info.IsDir() {205continue206}207208// Size check209size := info.Size()210if size == 0 || size > det.config.MaxFileSize {211log.Warnf("File size is too large, skipping: %s", filePath)212continue213}214215file := File{216Path: filePath,217Workspace: workspace,218Content: nil, // Content will be read by signature matcher if needed219Size: size,220ModTime: info.ModTime(),221}222223log.Infof("Found matching file: %s (pattern: %s, signature: %s, size: %d bytes)", filePath, relativeFilePath, sig.Name, size)224225select {226case files <- file:227log.Infof("File sent to channel: %s", filePath)228default:229log.Warnf("File dropped (channel full): %s", filePath)230}231}232}233}234}235236// findMatchingFiles finds files matching a pattern (supports wildcards and relative paths)237func (det *fileDetector) findMatchingFiles(workspaceRoot, relativeFilePath string) []string {238// For wildcard relativeFilePaths, we need to search within the workspace239// For simplicity, only search in the root directory for now240// TODO: Could be extended to search subdirectories up to WorkspaceDepth241matches, err := filepath.Glob(filepath.Join(workspaceRoot, relativeFilePath))242if err != nil {243return nil244}245246return matches247}248249// DiscoverFiles starts filesystem discovery. Must not be called more than once.250func (det *fileDetector) DiscoverFiles(ctx context.Context) (<-chan File, error) {251det.mu.Lock()252defer det.mu.Unlock()253254if det.fs != nil {255return nil, fmt.Errorf("already discovering files")256}257258res := make(chan File, 100)259det.fs = res260det.startOnce.Do(func() { det.start(ctx) })261262return res, nil263}264265266