Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/components/ee/agent-smith/pkg/detector/filesystem.go
2501 views
1
// Copyright (c) 2024 Gitpod GmbH. All rights reserved.
2
// Licensed under the GNU Affero General Public License (AGPL).
3
// See License.AGPL.txt in the project root for license information.
4
5
package detector
6
7
import (
8
"context"
9
"fmt"
10
"os"
11
"path/filepath"
12
"strings"
13
"sync"
14
"time"
15
16
"github.com/gitpod-io/gitpod/agent-smith/pkg/classifier"
17
"github.com/gitpod-io/gitpod/agent-smith/pkg/common"
18
"github.com/gitpod-io/gitpod/common-go/log"
19
)
20
21
// FileDetector discovers suspicious files on the node
22
type FileDetector interface {
23
// DiscoverFiles based on a relative path match given the classifier's signatures
24
DiscoverFiles(ctx context.Context) (<-chan File, error)
25
}
26
27
// File represents a file that might warrant closer inspection
28
type File struct {
29
Path string
30
Workspace *common.Workspace
31
Content []byte
32
Size int64
33
ModTime time.Time
34
}
35
36
// fileDetector scans workspace filesystems for files matching signature criteria
37
type fileDetector struct {
38
mu sync.RWMutex
39
fs chan File
40
41
config FileScanningConfig
42
classifier classifier.FileClassifier
43
lastScanTime time.Time
44
45
startOnce sync.Once
46
}
47
48
// FileScanningConfig holds configuration for file scanning
49
type FileScanningConfig struct {
50
Enabled bool
51
ScanInterval time.Duration
52
MaxFileSize int64
53
WorkingArea string
54
}
55
56
var _ FileDetector = &fileDetector{}
57
58
// NewfileDetector creates a new file detector
59
func NewfileDetector(config FileScanningConfig, fsClassifier classifier.FileClassifier) (*fileDetector, error) {
60
if !config.Enabled {
61
return nil, fmt.Errorf("file scanning is disabled")
62
}
63
64
// Set defaults
65
if config.ScanInterval == 0 {
66
config.ScanInterval = 5 * time.Minute
67
}
68
if config.MaxFileSize == 0 {
69
config.MaxFileSize = 1024 // 1KB default
70
}
71
if config.WorkingArea == "" {
72
return nil, fmt.Errorf("workingArea must be specified")
73
}
74
75
return &fileDetector{
76
config: config,
77
classifier: fsClassifier,
78
lastScanTime: time.Time{}, // Zero time means never scanned
79
}, nil
80
}
81
82
func (det *fileDetector) start(ctx context.Context) {
83
fs := make(chan File, 100)
84
go func() {
85
ticker := time.NewTicker(det.config.ScanInterval)
86
defer ticker.Stop()
87
88
for {
89
select {
90
case <-ctx.Done():
91
close(fs)
92
return
93
case <-ticker.C:
94
det.scanWorkspaces(fs)
95
}
96
}
97
}()
98
99
go func() {
100
for f := range fs {
101
// Convert FilesystemFile to SuspiciousFile for compatibility
102
file := File{
103
Path: f.Path,
104
Workspace: f.Workspace,
105
Content: nil, // Content will be read by signature matcher
106
Size: f.Size,
107
ModTime: f.ModTime,
108
}
109
det.fs <- file
110
}
111
}()
112
113
log.Info("filesystem detector started")
114
}
115
116
func (det *fileDetector) scanWorkspaces(files chan<- File) {
117
// Get filesystem signatures to know what files to look for
118
filesystemSignatures := det.GetFileSignatures()
119
if len(filesystemSignatures) == 0 {
120
log.Warn("no filesystem signatures configured, skipping scan")
121
return
122
}
123
124
// Scan working area directory for workspace directories
125
workspaceDirs, err := det.discoverWorkspaceDirectories()
126
if err != nil {
127
log.WithError(err).Error("failed to discover workspace directories")
128
return
129
}
130
131
log.Infof("found %d workspace directories, scanning for %d filesystem signatures", len(workspaceDirs), len(filesystemSignatures))
132
133
for _, wsDir := range workspaceDirs {
134
det.scanWorkspaceDirectory(wsDir, filesystemSignatures, files)
135
}
136
}
137
138
// GetFileSignatures returns signatures that should be used for filesystem scanning
139
// These are extracted from the configured classifier
140
func (det *fileDetector) GetFileSignatures() []*classifier.Signature {
141
if det.classifier == nil {
142
return nil
143
}
144
145
// Use the FileClassifier interface to get signatures
146
return det.classifier.GetFileSignatures()
147
}
148
149
// discoverWorkspaceDirectories scans the working area for workspace directories
150
func (det *fileDetector) discoverWorkspaceDirectories() ([]WorkspaceDirectory, error) {
151
entries, err := os.ReadDir(det.config.WorkingArea)
152
if err != nil {
153
return nil, fmt.Errorf("cannot read working area %s: %w", det.config.WorkingArea, err)
154
}
155
156
var workspaceDirs []WorkspaceDirectory
157
for _, entry := range entries {
158
if !entry.IsDir() {
159
continue
160
}
161
162
// Skip hidden directories and service directories (ending with -daemon)
163
name := entry.Name()
164
if strings.HasPrefix(name, ".") || strings.HasSuffix(name, "-daemon") {
165
continue
166
}
167
168
workspaceDir := WorkspaceDirectory{
169
InstanceID: name,
170
Path: filepath.Join(det.config.WorkingArea, name),
171
}
172
workspaceDirs = append(workspaceDirs, workspaceDir)
173
}
174
175
return workspaceDirs, nil
176
}
177
178
// WorkspaceDirectory represents a workspace directory on disk
179
type WorkspaceDirectory struct {
180
InstanceID string
181
Path string
182
}
183
184
func (det *fileDetector) scanWorkspaceDirectory(wsDir WorkspaceDirectory, signatures []*classifier.Signature, files chan<- File) {
185
// Create a minimal workspace object for this directory
186
workspace := &common.Workspace{
187
InstanceID: wsDir.InstanceID,
188
// We don't have other workspace metadata from directory scanning
189
// These would need to be populated from other sources if needed
190
}
191
192
// For each signature, check if any of its target files exist
193
for _, sig := range signatures {
194
for _, relativeFilePath := range sig.Filename {
195
matchingFiles := det.findMatchingFiles(wsDir.Path, relativeFilePath)
196
197
for _, filePath := range matchingFiles {
198
// Check if file exists and get its info
199
info, err := os.Stat(filePath)
200
if err != nil {
201
continue
202
}
203
204
// Skip directories
205
if info.IsDir() {
206
continue
207
}
208
209
// Size check
210
size := info.Size()
211
if size == 0 || size > det.config.MaxFileSize {
212
log.Warnf("File size is too large, skipping: %s", filePath)
213
continue
214
}
215
216
file := File{
217
Path: filePath,
218
Workspace: workspace,
219
Content: nil, // Content will be read by signature matcher if needed
220
Size: size,
221
ModTime: info.ModTime(),
222
}
223
224
log.Infof("Found matching file: %s (pattern: %s, signature: %s, size: %d bytes)", filePath, relativeFilePath, sig.Name, size)
225
226
select {
227
case files <- file:
228
log.Infof("File sent to channel: %s", filePath)
229
default:
230
log.Warnf("File dropped (channel full): %s", filePath)
231
}
232
}
233
}
234
}
235
}
236
237
// findMatchingFiles finds files matching a pattern (supports wildcards and relative paths)
238
func (det *fileDetector) findMatchingFiles(workspaceRoot, relativeFilePath string) []string {
239
// For wildcard relativeFilePaths, we need to search within the workspace
240
// For simplicity, only search in the root directory for now
241
// TODO: Could be extended to search subdirectories up to WorkspaceDepth
242
matches, err := filepath.Glob(filepath.Join(workspaceRoot, relativeFilePath))
243
if err != nil {
244
return nil
245
}
246
247
return matches
248
}
249
250
// DiscoverFiles starts filesystem discovery. Must not be called more than once.
251
func (det *fileDetector) DiscoverFiles(ctx context.Context) (<-chan File, error) {
252
det.mu.Lock()
253
defer det.mu.Unlock()
254
255
if det.fs != nil {
256
return nil, fmt.Errorf("already discovering files")
257
}
258
259
res := make(chan File, 100)
260
det.fs = res
261
det.startOnce.Do(func() { det.start(ctx) })
262
263
return res, nil
264
}
265
266