Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/components/ee/agent-smith/pkg/classifier/sinature.go
2501 views
1
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.
2
// Licensed under the GNU Affero General Public License (AGPL).
3
// See License.AGPL.txt in the project root for license information.
4
5
//go:build linux
6
// +build linux
7
8
package classifier
9
10
import (
11
"bytes"
12
"debug/elf"
13
"io"
14
"regexp"
15
16
"golang.org/x/xerrors"
17
)
18
19
// ObjectKind describes a type of object a signature can apply to
20
type ObjectKind string
21
22
const (
23
// ObjectAny applies to any object
24
ObjectAny ObjectKind = ""
25
// ObjectELFSymbols applies to ELF binaries
26
ObjectELFSymbols ObjectKind = "elf"
27
// ObjectELFRodata applies to the rodata section of ELF binaries
28
ObjectELFRodata ObjectKind = "elf-rodata"
29
)
30
31
// Domain describes where to look for the file to can for a signature
32
type Domain string
33
34
const (
35
// DomainProcess process
36
DomainProcess Domain = "process"
37
// DomainFileSystem filesystem
38
DomainFileSystem Domain = "filesystem"
39
)
40
41
// Signature is an identifying piece of information which can match to file
42
type Signature struct {
43
// Name is a description of the signature
44
Name string `json:"name,omitempty"`
45
46
// Domain describe where to look for the file to search for the signature
47
// "process" is dominant
48
// if domain is empty, we set "filesystem"
49
Domain Domain `json:"domain,omitempty"`
50
51
// The kind of file this signature can apply to
52
Kind ObjectKind `json:"kind,omitempty"`
53
54
// The pattern of the signature
55
Pattern []byte `json:"pattern"`
56
57
// If true, the pattern is expected to be a valid regular expression
58
Regexp bool `json:"regexp"`
59
60
// Checks only a specific section of the file. If the file is smaller than the end of the slice,
61
// the signature does not match.
62
Slice Slice `json:"slice,omitempty"`
63
64
// Filenames is a list of filenames this signature can match to
65
Filename []string `json:"filenames,omitempty"`
66
67
// compiledRegexp is an optimization so that we don't have to re-compile the regexp every time we use it
68
compiledRegexp *regexp.Regexp
69
}
70
71
// Slice demarks the area in a stream in which a signature ought to be tested in
72
type Slice struct {
73
Start int64 `json:"start,omitempty"`
74
End int64 `json:"end,omitempty"`
75
}
76
77
// Validate ensures the signature is valid and thus a file can be matched against it
78
func (s *Signature) Validate() error {
79
if len(s.Pattern) == 0 {
80
return xerrors.Errorf("signature has no pattern")
81
}
82
if s.Regexp {
83
c, err := regexp.Compile(string(s.Pattern))
84
if err != nil {
85
return xerrors.Errorf("signature has invalid regexp pattern: %w", err)
86
}
87
s.compiledRegexp = c
88
}
89
if s.Kind == ObjectELFSymbols && (s.Slice.Start != 0 || s.Slice.End != 0) {
90
return xerrors.Errorf("cannot use slice with ELF object kind")
91
}
92
93
if s.Slice.Start < 0 || s.Slice.End < 0 {
94
return xerrors.Errorf("slice start and end must be positive")
95
}
96
if s.Slice.Start != 0 && s.Slice.End != 0 && s.Slice.End <= s.Slice.Start {
97
return xerrors.Errorf("slice start must be smaller than slice end")
98
}
99
100
if s.Domain == "" {
101
s.Domain = DomainFileSystem
102
}
103
104
return nil
105
}
106
107
// Matches checks if the signature applies to the stream
108
func (s *Signature) Matches(in *SignatureReadCache) (bool, error) {
109
if s.Slice.Start > 0 {
110
_, err := in.Reader.ReadAt([]byte{}, s.Slice.Start)
111
// slice start exceeds what we can read - this signature cannot match
112
if err != nil {
113
return false, nil
114
}
115
}
116
if s.Slice.End > 0 {
117
_, err := in.Reader.ReadAt([]byte{}, s.Slice.End)
118
// slice start exceeds what we can read - this signature cannot match
119
if err != nil {
120
return false, nil
121
}
122
}
123
124
// check the object kind
125
if s.Kind != ObjectAny {
126
var head []byte
127
if len(in.header) > 0 {
128
head = in.header
129
} else {
130
head = make([]byte, 261)
131
_, err := in.Reader.ReadAt(head, 0)
132
if err == io.EOF {
133
// cannot read header which means that only Any rules would apply
134
return false, nil
135
}
136
if err != nil {
137
return false, xerrors.Errorf("cannot read stream head: %w", err)
138
}
139
in.header = head
140
}
141
142
matches := false
143
switch s.Kind {
144
case ObjectELFSymbols, ObjectELFRodata:
145
matches = isELF(head)
146
default:
147
matches = true
148
}
149
if !matches {
150
return false, nil
151
}
152
}
153
154
// necessary to do a string match for text files
155
if s.Domain == DomainFileSystem {
156
return s.matchTextFile(in)
157
}
158
159
// match the specific kind
160
switch s.Kind {
161
case ObjectELFSymbols:
162
return s.matchELF(in)
163
case ObjectELFRodata:
164
return s.matchELFRodata(in)
165
default:
166
return s.matchAny(in)
167
}
168
}
169
170
// elfMagicNumber are the first few bytes of an ELF file
171
var elfMagicNumber = []byte{0x7f, 0x45, 0x4c, 0x46}
172
173
func isELF(head []byte) bool {
174
if len(head) < len(elfMagicNumber) {
175
return false
176
}
177
178
for i := 0; i < len(elfMagicNumber); i++ {
179
if head[i] != elfMagicNumber[i] {
180
return false
181
}
182
}
183
184
return true
185
}
186
187
// matchELF matches a signature against an ELF file
188
func (s *Signature) matchELFRodata(in *SignatureReadCache) (bool, error) {
189
var rodata []byte
190
if len(in.rodata) > 0 {
191
rodata = in.rodata
192
} else {
193
executable, err := elf.NewFile(in.Reader)
194
if err != nil {
195
return false, xerrors.Errorf("cannot anaylse ELF file: %w", err)
196
}
197
198
rodata, err = ExtractELFRodata(executable)
199
if err != nil {
200
return false, err
201
}
202
in.rodata = rodata
203
}
204
205
matches, err := s.matches(rodata)
206
if matches || err != nil {
207
return matches, err
208
}
209
210
return false, nil
211
}
212
213
// matchELF matches a signature against an ELF file
214
func (s *Signature) matchELF(in *SignatureReadCache) (bool, error) {
215
var symbols []string
216
if len(in.symbols) > 0 {
217
symbols = in.symbols
218
} else {
219
executable, err := elf.NewFile(in.Reader)
220
if err != nil {
221
return false, xerrors.Errorf("cannot anaylse ELF file: %w", err)
222
}
223
224
symbols, err = ExtractELFSymbols(executable)
225
if err != nil {
226
return false, err
227
}
228
in.symbols = symbols
229
}
230
231
for _, sym := range symbols {
232
matches, err := s.matches([]byte(sym))
233
if matches || err != nil {
234
return matches, err
235
}
236
}
237
238
return false, nil
239
}
240
241
// ExtractELFSymbols extracts all ELF symbol names from an ELF binary
242
func ExtractELFSymbols(executable *elf.File) ([]string, error) {
243
syms, err := executable.Symbols()
244
if err != nil && err != elf.ErrNoSymbols {
245
return nil, xerrors.Errorf("cannot get dynsym section: %w", err)
246
}
247
248
dynsyms, err := executable.DynamicSymbols()
249
if err != nil && err != elf.ErrNoSymbols {
250
return nil, xerrors.Errorf("cannot get dynsym section: %w", err)
251
}
252
253
symbols := make([]string, len(syms)+len(dynsyms))
254
i := 0
255
for _, s := range syms {
256
symbols[i] = s.Name
257
i += 1
258
}
259
260
for _, s := range dynsyms {
261
symbols[i] = s.Name
262
i += 1
263
}
264
265
return symbols, nil
266
}
267
268
// ExtractELFRodata extracts the .rodata section
269
func ExtractELFRodata(executable *elf.File) ([]byte, error) {
270
data := executable.Section(".rodata")
271
if data == nil {
272
// not having a .rodata section is no error in the strict sense
273
return nil, nil
274
}
275
bs, err := data.Data()
276
if err != nil {
277
return nil, xerrors.Errorf("cannot get .rodata section: %w", err)
278
}
279
return bs, nil
280
}
281
282
// matchAny matches a signature against a binary file
283
func (s *Signature) matchAny(in *SignatureReadCache) (bool, error) {
284
buffer := make([]byte, 8096)
285
pos := s.Slice.Start
286
for {
287
n, err := in.Reader.ReadAt(buffer, pos)
288
sub := buffer[0:n]
289
pos += int64(n)
290
291
// TODO: deal with buffer edges (i.e. pattern wrapping around the buffer edge)
292
if bytes.Contains(sub, s.Pattern) {
293
return true, nil
294
}
295
296
if err == io.EOF {
297
break
298
}
299
if err != nil {
300
return false, xerrors.Errorf("cannot read stream: %w", err)
301
}
302
if s.Slice.End > 0 && pos >= s.Slice.End {
303
break
304
}
305
}
306
307
return false, nil
308
}
309
310
// matchAny matches a signature against a text file
311
func (s *Signature) matchTextFile(in *SignatureReadCache) (bool, error) {
312
buffer := make([]byte, 8096)
313
pos := s.Slice.Start
314
for {
315
n, err := in.Reader.ReadAt(buffer, pos)
316
sub := buffer[0:n]
317
pos += int64(n)
318
319
match, matchErr := s.matches(sub)
320
if matchErr != nil {
321
return false, matchErr
322
}
323
if match {
324
return true, nil
325
}
326
327
if err == io.EOF {
328
break
329
}
330
if err != nil {
331
return false, xerrors.Errorf("cannot read stream: %w", err)
332
}
333
if s.Slice.End > 0 && pos >= s.Slice.End {
334
break
335
}
336
}
337
338
return false, nil
339
}
340
341
// matchesString checks if the signature matches a string (respects and caches regexp)
342
func (s *Signature) matches(v []byte) (bool, error) {
343
if s.Regexp {
344
if s.compiledRegexp == nil {
345
var err error
346
s.compiledRegexp, err = regexp.Compile(string(s.Pattern))
347
if err != nil {
348
return false, xerrors.Errorf("invalid regexp pattern: %w", err)
349
}
350
}
351
352
return s.compiledRegexp.Match(v), nil
353
}
354
355
return bytes.Contains(v, s.Pattern), nil
356
}
357
358