Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/components/ee/agent-smith/pkg/classifier/classifier.go
2501 views
1
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.
2
// Licensed under the GNU Affero General Public License (AGPL).
3
// See License.AGPL.txt in the project root for license information.
4
5
package classifier
6
7
import (
8
"errors"
9
"fmt"
10
"io"
11
"io/fs"
12
"os"
13
"regexp"
14
"strings"
15
16
"github.com/gitpod-io/gitpod/agent-smith/pkg/common"
17
"github.com/gitpod-io/gitpod/common-go/log"
18
"github.com/prometheus/client_golang/prometheus"
19
"github.com/sirupsen/logrus"
20
)
21
22
const (
23
ClassifierCommandline string = "commandline"
24
ClassifierComposite string = "composite"
25
ClassifierSignature string = "signature"
26
ClassifierGraded string = "graded"
27
)
28
29
type Classification struct {
30
Level Level
31
Classifier string
32
Message string
33
}
34
35
type Level string
36
37
const (
38
LevelNoMatch Level = "no-match"
39
LevelBarely Level = Level(common.SeverityBarely)
40
LevelAudit Level = Level(common.SeverityAudit)
41
LevelVery Level = Level(common.SeverityVery)
42
)
43
44
// ProcessClassifier matches a process against a set of criteria
45
type ProcessClassifier interface {
46
prometheus.Collector
47
48
Matches(executable string, cmdline []string) (*Classification, error)
49
}
50
51
// FileClassifier matches filesystem files against signatures
52
type FileClassifier interface {
53
MatchesFile(filePath string) (*Classification, error)
54
GetFileSignatures() []*Signature
55
}
56
57
func NewCommandlineClassifier(name string, level Level, allowList []string, blockList []string) (*CommandlineClassifier, error) {
58
al := make([]*regexp.Regexp, 0, len(allowList))
59
for _, a := range allowList {
60
r, err := regexp.Compile(a)
61
if err != nil {
62
return nil, fmt.Errorf("cannot compile %s: %w", a, err)
63
}
64
al = append(al, r)
65
}
66
67
return &CommandlineClassifier{
68
DefaultLevel: level,
69
AllowList: al,
70
BlockList: blockList,
71
72
allowListHitTotal: prometheus.NewCounter(prometheus.CounterOpts{
73
Namespace: "gitpod_agent_smith",
74
Subsystem: "classifier_commandline",
75
Name: "allowlist_hit_total",
76
Help: "total count of allowlist hits",
77
ConstLabels: prometheus.Labels{
78
"classifier_name": name,
79
},
80
}),
81
blocklistHitTotal: prometheus.NewCounter(prometheus.CounterOpts{
82
Namespace: "gitpod_agent_smith",
83
Subsystem: "classifier_commandline",
84
Name: "blocklist_hit_total",
85
Help: "total count of blocklist hits",
86
ConstLabels: prometheus.Labels{
87
"classifier_name": name,
88
},
89
}),
90
}, nil
91
}
92
93
// CommandlineClassifier looks at the commandline of a process
94
type CommandlineClassifier struct {
95
DefaultLevel Level
96
AllowList []*regexp.Regexp
97
BlockList []string
98
99
allowListHitTotal prometheus.Counter
100
blocklistHitTotal prometheus.Counter
101
}
102
103
var _ ProcessClassifier = &CommandlineClassifier{}
104
105
var clNoMatch = &Classification{Level: LevelNoMatch, Classifier: ClassifierCommandline}
106
107
func (cl *CommandlineClassifier) Matches(executable string, cmdline []string) (*Classification, error) {
108
for _, pattern := range cl.AllowList {
109
if pattern.MatchString(executable) || pattern.MatchString(fmt.Sprintf("%v", cmdline)) {
110
cl.allowListHitTotal.Inc()
111
return clNoMatch, nil
112
}
113
}
114
115
for _, b := range cl.BlockList {
116
if strings.Contains(executable, b) || strings.Contains(strings.Join(cmdline, "|"), b) {
117
cl.blocklistHitTotal.Inc()
118
return &Classification{
119
Level: cl.DefaultLevel,
120
Classifier: ClassifierCommandline,
121
Message: fmt.Sprintf("matched \"%s\"", b),
122
}, nil
123
}
124
}
125
126
return clNoMatch, nil
127
}
128
129
func (cl *CommandlineClassifier) Describe(d chan<- *prometheus.Desc) {
130
cl.allowListHitTotal.Describe(d)
131
cl.blocklistHitTotal.Describe(d)
132
}
133
134
func (cl *CommandlineClassifier) Collect(m chan<- prometheus.Metric) {
135
cl.allowListHitTotal.Collect(m)
136
cl.blocklistHitTotal.Collect(m)
137
}
138
139
func NewSignatureMatchClassifier(name string, defaultLevel Level, sig []*Signature) *SignatureMatchClassifier {
140
return &SignatureMatchClassifier{
141
Signatures: sig,
142
DefaultLevel: defaultLevel,
143
processMissTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
144
Namespace: "gitpod_agent_smith",
145
Subsystem: "classifier_signature",
146
Name: "process_miss_total",
147
Help: "total count of process executable misses",
148
ConstLabels: prometheus.Labels{
149
"classifier_name": name,
150
},
151
}, []string{"reason"}),
152
signatureHitTotal: prometheus.NewCounter(prometheus.CounterOpts{
153
Namespace: "gitpod_agent_smith",
154
Subsystem: "classifier_signature",
155
Name: "signature_hit_total",
156
Help: "total count of process executable signature hits",
157
ConstLabels: prometheus.Labels{
158
"classifier_name": name,
159
},
160
}),
161
}
162
}
163
164
const (
165
// processMissNotFound is the reason we use on the process miss metric when
166
// either the process itself or its executable cannot be found.
167
processMissNotFound = "not_found"
168
processMissPermissionDenied = "permission_denied"
169
processMissOther = "other"
170
)
171
172
// SignatureMatchClassifier matches against binary signatures
173
type SignatureMatchClassifier struct {
174
Signatures []*Signature
175
DefaultLevel Level
176
177
processMissTotal *prometheus.CounterVec
178
signatureHitTotal prometheus.Counter
179
}
180
181
var _ ProcessClassifier = &SignatureMatchClassifier{}
182
var _ FileClassifier = &SignatureMatchClassifier{}
183
184
var sigNoMatch = &Classification{Level: LevelNoMatch, Classifier: ClassifierSignature}
185
186
func (sigcl *SignatureMatchClassifier) Matches(executable string, cmdline []string) (c *Classification, err error) {
187
r, err := os.Open(executable)
188
if err != nil {
189
var reason string
190
if errors.Is(err, fs.ErrNotExist) {
191
reason = processMissNotFound
192
} else if errors.Is(err, os.ErrPermission) {
193
reason = processMissPermissionDenied
194
} else {
195
reason = processMissOther
196
}
197
sigcl.processMissTotal.WithLabelValues(reason).Inc()
198
log.WithFields(logrus.Fields{
199
"executable": executable,
200
"cmdline": cmdline,
201
"reason": reason,
202
}).WithError(err).Debug("signature classification miss")
203
return sigNoMatch, nil
204
}
205
defer r.Close()
206
207
var serr error
208
209
src := SignatureReadCache{
210
Reader: r,
211
}
212
for _, sig := range sigcl.Signatures {
213
match, err := sig.Matches(&src)
214
if match {
215
sigcl.signatureHitTotal.Inc()
216
return &Classification{
217
Level: sigcl.DefaultLevel,
218
Classifier: ClassifierSignature,
219
Message: fmt.Sprintf("matches %s", sig.Name),
220
}, nil
221
}
222
if err != nil {
223
serr = err
224
}
225
}
226
if serr != nil {
227
return nil, err
228
}
229
230
return sigNoMatch, nil
231
}
232
233
// MatchesFile checks if a filesystem file matches any filesystem signatures
234
func (sigcl *SignatureMatchClassifier) MatchesFile(filePath string) (c *Classification, err error) {
235
filesystemSignatures := sigcl.GetFileSignatures()
236
237
if len(filesystemSignatures) == 0 {
238
return sigNoMatch, nil
239
}
240
241
// Skip filename matching - the filesystem detector already filtered files
242
// based on signature filename patterns, so any file that reaches here
243
// should be checked for content matching against all filesystem signatures
244
matchingSignatures := filesystemSignatures
245
246
// Open file for signature matching
247
r, err := os.Open(filePath)
248
if err != nil {
249
var reason string
250
if errors.Is(err, fs.ErrNotExist) {
251
reason = processMissNotFound
252
} else if errors.Is(err, os.ErrPermission) {
253
reason = processMissPermissionDenied
254
} else {
255
reason = processMissOther
256
}
257
log.WithFields(logrus.Fields{
258
"filePath": filePath,
259
"reason": reason,
260
}).WithError(err).Debug("filesystem signature classification miss")
261
return sigNoMatch, nil
262
}
263
defer r.Close()
264
265
var serr error
266
267
src := SignatureReadCache{
268
Reader: r,
269
}
270
for _, sig := range matchingSignatures {
271
match, err := sig.Matches(&src)
272
if match {
273
return &Classification{
274
Level: sigcl.DefaultLevel,
275
Classifier: ClassifierSignature,
276
Message: fmt.Sprintf("filesystem signature matches %s", sig.Name),
277
}, nil
278
}
279
if err != nil {
280
serr = err
281
}
282
}
283
if serr != nil {
284
return nil, serr
285
}
286
287
return sigNoMatch, nil
288
}
289
290
type SignatureReadCache struct {
291
Reader io.ReaderAt
292
header []byte
293
symbols []string
294
rodata []byte
295
}
296
297
func (sigcl *SignatureMatchClassifier) Describe(d chan<- *prometheus.Desc) {
298
sigcl.processMissTotal.Describe(d)
299
sigcl.signatureHitTotal.Describe(d)
300
}
301
302
func (sigcl *SignatureMatchClassifier) Collect(m chan<- prometheus.Metric) {
303
sigcl.processMissTotal.Collect(m)
304
sigcl.signatureHitTotal.Collect(m)
305
}
306
307
// GetFileSignatures returns signatures that are configured for filesystem domain
308
func (sigcl *SignatureMatchClassifier) GetFileSignatures() []*Signature {
309
var filesystemSignatures []*Signature
310
for _, sig := range sigcl.Signatures {
311
if sig.Domain == DomainFileSystem {
312
filesystemSignatures = append(filesystemSignatures, sig)
313
}
314
}
315
return filesystemSignatures
316
}
317
318
// CompositeClassifier combines multiple classifiers into one. The first match wins.
319
type CompositeClassifier []ProcessClassifier
320
321
var _ ProcessClassifier = CompositeClassifier{}
322
323
var cmpNoMatch = &Classification{Level: LevelNoMatch, Classifier: ClassifierComposite}
324
325
func (cl CompositeClassifier) Matches(executable string, cmdline []string) (*Classification, error) {
326
var (
327
c *Classification
328
err error
329
)
330
for _, class := range cl {
331
var cerr error
332
c, cerr = class.Matches(executable, cmdline)
333
if c != nil && c.Level != LevelNoMatch {
334
// we've found a match - ignore previous errors
335
err = nil
336
break
337
}
338
if cerr != nil {
339
err = cerr
340
}
341
}
342
if err != nil {
343
return nil, err
344
}
345
346
if c == nil {
347
// empty composite classifier
348
return cmpNoMatch, nil
349
}
350
if c.Level == LevelNoMatch {
351
return cmpNoMatch, nil
352
}
353
354
res := *c
355
res.Classifier = ClassifierComposite + "." + res.Classifier
356
return &res, nil
357
}
358
359
func (cl CompositeClassifier) Describe(d chan<- *prometheus.Desc) {
360
for _, c := range cl {
361
obs, ok := c.(prometheus.Collector)
362
if !ok {
363
continue
364
}
365
obs.Describe(d)
366
}
367
}
368
369
func (cl CompositeClassifier) Collect(m chan<- prometheus.Metric) {
370
for _, c := range cl {
371
obs, ok := c.(prometheus.Collector)
372
if !ok {
373
continue
374
}
375
obs.Collect(m)
376
}
377
}
378
379
// GradedClassifier classifies processes based on a grading, in the order of "very", "barely", "audit"
380
type GradedClassifier map[Level]ProcessClassifier
381
382
var _ ProcessClassifier = GradedClassifier{}
383
384
var gradNoMatch = &Classification{Level: LevelNoMatch, Classifier: ClassifierGraded}
385
386
func (cl GradedClassifier) Matches(executable string, cmdline []string) (*Classification, error) {
387
order := []Level{LevelVery, LevelBarely, LevelAudit}
388
389
var (
390
c *Classification
391
err error
392
)
393
for _, lvl := range order {
394
class, ok := cl[lvl]
395
if !ok {
396
continue
397
}
398
399
var cerr error
400
c, cerr = class.Matches(executable, cmdline)
401
if c != nil && c.Level != LevelNoMatch {
402
// we've found a match - ignore previous errors
403
err = nil
404
break
405
}
406
if cerr != nil {
407
err = cerr
408
}
409
}
410
if err != nil {
411
return nil, err
412
}
413
414
if c == nil {
415
// empty graded classifier
416
return gradNoMatch, nil
417
}
418
if c.Level == LevelNoMatch {
419
return gradNoMatch, nil
420
}
421
422
res := *c
423
res.Classifier = ClassifierGraded + "." + res.Classifier
424
return &res, nil
425
}
426
427
func (cl GradedClassifier) Describe(d chan<- *prometheus.Desc) {
428
for _, c := range cl {
429
obs, ok := c.(prometheus.Collector)
430
if !ok {
431
continue
432
}
433
obs.Describe(d)
434
}
435
}
436
437
func (cl GradedClassifier) Collect(m chan<- prometheus.Metric) {
438
for _, c := range cl {
439
obs, ok := c.(prometheus.Collector)
440
if !ok {
441
continue
442
}
443
obs.Collect(m)
444
}
445
}
446
447
func NewCountingMetricsClassifier(name string, delegate ProcessClassifier) *CountingMetricsClassifier {
448
return &CountingMetricsClassifier{
449
D: delegate,
450
callCount: prometheus.NewCounter(prometheus.CounterOpts{
451
Namespace: "gitpod_agent_smith",
452
Subsystem: "classifier_count",
453
Name: "match_total",
454
Help: "total count of all Matches calls",
455
ConstLabels: prometheus.Labels{
456
"classifier_name": name,
457
},
458
}),
459
}
460
}
461
462
// CountingMetricsClassifier adds a call count metric to a classifier
463
type CountingMetricsClassifier struct {
464
D ProcessClassifier
465
466
callCount prometheus.Counter
467
}
468
469
var _ ProcessClassifier = &CountingMetricsClassifier{}
470
471
func (cl *CountingMetricsClassifier) Matches(executable string, cmdline []string) (*Classification, error) {
472
cl.callCount.Inc()
473
return cl.D.Matches(executable, cmdline)
474
}
475
476
func (cl *CountingMetricsClassifier) Describe(d chan<- *prometheus.Desc) {
477
cl.callCount.Describe(d)
478
cl.D.Describe(d)
479
}
480
481
func (cl *CountingMetricsClassifier) Collect(m chan<- prometheus.Metric) {
482
cl.callCount.Collect(m)
483
cl.D.Collect(m)
484
}
485
486