Path: blob/main/components/ee/agent-smith/pkg/classifier/classifier.go
2501 views
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.1// Licensed under the GNU Affero General Public License (AGPL).2// See License.AGPL.txt in the project root for license information.34package classifier56import (7"errors"8"fmt"9"io"10"io/fs"11"os"12"regexp"13"strings"1415"github.com/gitpod-io/gitpod/agent-smith/pkg/common"16"github.com/gitpod-io/gitpod/common-go/log"17"github.com/prometheus/client_golang/prometheus"18"github.com/sirupsen/logrus"19)2021const (22ClassifierCommandline string = "commandline"23ClassifierComposite string = "composite"24ClassifierSignature string = "signature"25ClassifierGraded string = "graded"26)2728type Classification struct {29Level Level30Classifier string31Message string32}3334type Level string3536const (37LevelNoMatch Level = "no-match"38LevelBarely Level = Level(common.SeverityBarely)39LevelAudit Level = Level(common.SeverityAudit)40LevelVery Level = Level(common.SeverityVery)41)4243// ProcessClassifier matches a process against a set of criteria44type ProcessClassifier interface {45prometheus.Collector4647Matches(executable string, cmdline []string) (*Classification, error)48}4950// FileClassifier matches filesystem files against signatures51type FileClassifier interface {52MatchesFile(filePath string) (*Classification, error)53GetFileSignatures() []*Signature54}5556func NewCommandlineClassifier(name string, level Level, allowList []string, blockList []string) (*CommandlineClassifier, error) {57al := make([]*regexp.Regexp, 0, len(allowList))58for _, a := range allowList {59r, err := regexp.Compile(a)60if err != nil {61return nil, fmt.Errorf("cannot compile %s: %w", a, err)62}63al = append(al, r)64}6566return &CommandlineClassifier{67DefaultLevel: level,68AllowList: al,69BlockList: blockList,7071allowListHitTotal: prometheus.NewCounter(prometheus.CounterOpts{72Namespace: "gitpod_agent_smith",73Subsystem: "classifier_commandline",74Name: "allowlist_hit_total",75Help: "total count of allowlist hits",76ConstLabels: prometheus.Labels{77"classifier_name": name,78},79}),80blocklistHitTotal: prometheus.NewCounter(prometheus.CounterOpts{81Namespace: "gitpod_agent_smith",82Subsystem: "classifier_commandline",83Name: "blocklist_hit_total",84Help: "total count of blocklist hits",85ConstLabels: prometheus.Labels{86"classifier_name": name,87},88}),89}, nil90}9192// CommandlineClassifier looks at the commandline of a process93type CommandlineClassifier struct {94DefaultLevel Level95AllowList []*regexp.Regexp96BlockList []string9798allowListHitTotal prometheus.Counter99blocklistHitTotal prometheus.Counter100}101102var _ ProcessClassifier = &CommandlineClassifier{}103104var clNoMatch = &Classification{Level: LevelNoMatch, Classifier: ClassifierCommandline}105106func (cl *CommandlineClassifier) Matches(executable string, cmdline []string) (*Classification, error) {107for _, pattern := range cl.AllowList {108if pattern.MatchString(executable) || pattern.MatchString(fmt.Sprintf("%v", cmdline)) {109cl.allowListHitTotal.Inc()110return clNoMatch, nil111}112}113114for _, b := range cl.BlockList {115if strings.Contains(executable, b) || strings.Contains(strings.Join(cmdline, "|"), b) {116cl.blocklistHitTotal.Inc()117return &Classification{118Level: cl.DefaultLevel,119Classifier: ClassifierCommandline,120Message: fmt.Sprintf("matched \"%s\"", b),121}, nil122}123}124125return clNoMatch, nil126}127128func (cl *CommandlineClassifier) Describe(d chan<- *prometheus.Desc) {129cl.allowListHitTotal.Describe(d)130cl.blocklistHitTotal.Describe(d)131}132133func (cl *CommandlineClassifier) Collect(m chan<- prometheus.Metric) {134cl.allowListHitTotal.Collect(m)135cl.blocklistHitTotal.Collect(m)136}137138func NewSignatureMatchClassifier(name string, defaultLevel Level, sig []*Signature) *SignatureMatchClassifier {139return &SignatureMatchClassifier{140Signatures: sig,141DefaultLevel: defaultLevel,142processMissTotal: prometheus.NewCounterVec(prometheus.CounterOpts{143Namespace: "gitpod_agent_smith",144Subsystem: "classifier_signature",145Name: "process_miss_total",146Help: "total count of process executable misses",147ConstLabels: prometheus.Labels{148"classifier_name": name,149},150}, []string{"reason"}),151signatureHitTotal: prometheus.NewCounter(prometheus.CounterOpts{152Namespace: "gitpod_agent_smith",153Subsystem: "classifier_signature",154Name: "signature_hit_total",155Help: "total count of process executable signature hits",156ConstLabels: prometheus.Labels{157"classifier_name": name,158},159}),160}161}162163const (164// processMissNotFound is the reason we use on the process miss metric when165// either the process itself or its executable cannot be found.166processMissNotFound = "not_found"167processMissPermissionDenied = "permission_denied"168processMissOther = "other"169)170171// SignatureMatchClassifier matches against binary signatures172type SignatureMatchClassifier struct {173Signatures []*Signature174DefaultLevel Level175176processMissTotal *prometheus.CounterVec177signatureHitTotal prometheus.Counter178}179180var _ ProcessClassifier = &SignatureMatchClassifier{}181var _ FileClassifier = &SignatureMatchClassifier{}182183var sigNoMatch = &Classification{Level: LevelNoMatch, Classifier: ClassifierSignature}184185func (sigcl *SignatureMatchClassifier) Matches(executable string, cmdline []string) (c *Classification, err error) {186r, err := os.Open(executable)187if err != nil {188var reason string189if errors.Is(err, fs.ErrNotExist) {190reason = processMissNotFound191} else if errors.Is(err, os.ErrPermission) {192reason = processMissPermissionDenied193} else {194reason = processMissOther195}196sigcl.processMissTotal.WithLabelValues(reason).Inc()197log.WithFields(logrus.Fields{198"executable": executable,199"cmdline": cmdline,200"reason": reason,201}).WithError(err).Debug("signature classification miss")202return sigNoMatch, nil203}204defer r.Close()205206var serr error207208src := SignatureReadCache{209Reader: r,210}211for _, sig := range sigcl.Signatures {212match, err := sig.Matches(&src)213if match {214sigcl.signatureHitTotal.Inc()215return &Classification{216Level: sigcl.DefaultLevel,217Classifier: ClassifierSignature,218Message: fmt.Sprintf("matches %s", sig.Name),219}, nil220}221if err != nil {222serr = err223}224}225if serr != nil {226return nil, err227}228229return sigNoMatch, nil230}231232// MatchesFile checks if a filesystem file matches any filesystem signatures233func (sigcl *SignatureMatchClassifier) MatchesFile(filePath string) (c *Classification, err error) {234filesystemSignatures := sigcl.GetFileSignatures()235236if len(filesystemSignatures) == 0 {237return sigNoMatch, nil238}239240// Skip filename matching - the filesystem detector already filtered files241// based on signature filename patterns, so any file that reaches here242// should be checked for content matching against all filesystem signatures243matchingSignatures := filesystemSignatures244245// Open file for signature matching246r, err := os.Open(filePath)247if err != nil {248var reason string249if errors.Is(err, fs.ErrNotExist) {250reason = processMissNotFound251} else if errors.Is(err, os.ErrPermission) {252reason = processMissPermissionDenied253} else {254reason = processMissOther255}256log.WithFields(logrus.Fields{257"filePath": filePath,258"reason": reason,259}).WithError(err).Debug("filesystem signature classification miss")260return sigNoMatch, nil261}262defer r.Close()263264var serr error265266src := SignatureReadCache{267Reader: r,268}269for _, sig := range matchingSignatures {270match, err := sig.Matches(&src)271if match {272return &Classification{273Level: sigcl.DefaultLevel,274Classifier: ClassifierSignature,275Message: fmt.Sprintf("filesystem signature matches %s", sig.Name),276}, nil277}278if err != nil {279serr = err280}281}282if serr != nil {283return nil, serr284}285286return sigNoMatch, nil287}288289type SignatureReadCache struct {290Reader io.ReaderAt291header []byte292symbols []string293rodata []byte294}295296func (sigcl *SignatureMatchClassifier) Describe(d chan<- *prometheus.Desc) {297sigcl.processMissTotal.Describe(d)298sigcl.signatureHitTotal.Describe(d)299}300301func (sigcl *SignatureMatchClassifier) Collect(m chan<- prometheus.Metric) {302sigcl.processMissTotal.Collect(m)303sigcl.signatureHitTotal.Collect(m)304}305306// GetFileSignatures returns signatures that are configured for filesystem domain307func (sigcl *SignatureMatchClassifier) GetFileSignatures() []*Signature {308var filesystemSignatures []*Signature309for _, sig := range sigcl.Signatures {310if sig.Domain == DomainFileSystem {311filesystemSignatures = append(filesystemSignatures, sig)312}313}314return filesystemSignatures315}316317// CompositeClassifier combines multiple classifiers into one. The first match wins.318type CompositeClassifier []ProcessClassifier319320var _ ProcessClassifier = CompositeClassifier{}321322var cmpNoMatch = &Classification{Level: LevelNoMatch, Classifier: ClassifierComposite}323324func (cl CompositeClassifier) Matches(executable string, cmdline []string) (*Classification, error) {325var (326c *Classification327err error328)329for _, class := range cl {330var cerr error331c, cerr = class.Matches(executable, cmdline)332if c != nil && c.Level != LevelNoMatch {333// we've found a match - ignore previous errors334err = nil335break336}337if cerr != nil {338err = cerr339}340}341if err != nil {342return nil, err343}344345if c == nil {346// empty composite classifier347return cmpNoMatch, nil348}349if c.Level == LevelNoMatch {350return cmpNoMatch, nil351}352353res := *c354res.Classifier = ClassifierComposite + "." + res.Classifier355return &res, nil356}357358func (cl CompositeClassifier) Describe(d chan<- *prometheus.Desc) {359for _, c := range cl {360obs, ok := c.(prometheus.Collector)361if !ok {362continue363}364obs.Describe(d)365}366}367368func (cl CompositeClassifier) Collect(m chan<- prometheus.Metric) {369for _, c := range cl {370obs, ok := c.(prometheus.Collector)371if !ok {372continue373}374obs.Collect(m)375}376}377378// GradedClassifier classifies processes based on a grading, in the order of "very", "barely", "audit"379type GradedClassifier map[Level]ProcessClassifier380381var _ ProcessClassifier = GradedClassifier{}382383var gradNoMatch = &Classification{Level: LevelNoMatch, Classifier: ClassifierGraded}384385func (cl GradedClassifier) Matches(executable string, cmdline []string) (*Classification, error) {386order := []Level{LevelVery, LevelBarely, LevelAudit}387388var (389c *Classification390err error391)392for _, lvl := range order {393class, ok := cl[lvl]394if !ok {395continue396}397398var cerr error399c, cerr = class.Matches(executable, cmdline)400if c != nil && c.Level != LevelNoMatch {401// we've found a match - ignore previous errors402err = nil403break404}405if cerr != nil {406err = cerr407}408}409if err != nil {410return nil, err411}412413if c == nil {414// empty graded classifier415return gradNoMatch, nil416}417if c.Level == LevelNoMatch {418return gradNoMatch, nil419}420421res := *c422res.Classifier = ClassifierGraded + "." + res.Classifier423return &res, nil424}425426func (cl GradedClassifier) Describe(d chan<- *prometheus.Desc) {427for _, c := range cl {428obs, ok := c.(prometheus.Collector)429if !ok {430continue431}432obs.Describe(d)433}434}435436func (cl GradedClassifier) Collect(m chan<- prometheus.Metric) {437for _, c := range cl {438obs, ok := c.(prometheus.Collector)439if !ok {440continue441}442obs.Collect(m)443}444}445446func NewCountingMetricsClassifier(name string, delegate ProcessClassifier) *CountingMetricsClassifier {447return &CountingMetricsClassifier{448D: delegate,449callCount: prometheus.NewCounter(prometheus.CounterOpts{450Namespace: "gitpod_agent_smith",451Subsystem: "classifier_count",452Name: "match_total",453Help: "total count of all Matches calls",454ConstLabels: prometheus.Labels{455"classifier_name": name,456},457}),458}459}460461// CountingMetricsClassifier adds a call count metric to a classifier462type CountingMetricsClassifier struct {463D ProcessClassifier464465callCount prometheus.Counter466}467468var _ ProcessClassifier = &CountingMetricsClassifier{}469470func (cl *CountingMetricsClassifier) Matches(executable string, cmdline []string) (*Classification, error) {471cl.callCount.Inc()472return cl.D.Matches(executable, cmdline)473}474475func (cl *CountingMetricsClassifier) Describe(d chan<- *prometheus.Desc) {476cl.callCount.Describe(d)477cl.D.Describe(d)478}479480func (cl *CountingMetricsClassifier) Collect(m chan<- prometheus.Metric) {481cl.callCount.Collect(m)482cl.D.Collect(m)483}484485486