Path: blob/dev/pkg/fuzz/analyzers/xss/analyzer.go
4538 views
package xss12import (3"io"4"strings"56"github.com/projectdiscovery/nuclei/v3/pkg/fuzz/analyzers"7"golang.org/x/net/html"8)910const (11analyzerName = "xss_context"12maxResponseBodyBytes = 10 * 1024 * 1024 // 10 MiB13)1415// XSSAnalyzer implements the analyzers.Analyzer interface for XSS context detection.16type XSSAnalyzer struct{}1718var _ analyzers.Analyzer = &XSSAnalyzer{}1920func init() {21analyzers.RegisterAnalyzer(analyzerName, &XSSAnalyzer{})22}2324func (a *XSSAnalyzer) Name() string {25return analyzerName26}2728func (a *XSSAnalyzer) ApplyInitialTransformation(data string, params map[string]interface{}) string {29return analyzers.ApplyPayloadTransformations(data)30}3132func (a *XSSAnalyzer) Analyze(options *analyzers.Options) (bool, string, error) {33if options == nil || options.FuzzGenerated.Component == nil || options.HttpClient == nil {34return false, "", nil35}3637gr := options.FuzzGenerated38payload := gr.Value39if payload == "" {40return false, "", nil41}4243if err := gr.Component.SetValue(gr.Key, payload); err != nil {44return false, "", err45}46defer func() {47_ = gr.Component.SetValue(gr.Key, gr.OriginalValue)48}()4950rebuilt, err := gr.Component.Rebuild()51if err != nil {52return false, "", err53}5455resp, err := options.HttpClient.Do(rebuilt)56if err != nil {57return false, "", err58}59defer func() {60_ = resp.Body.Close()61}()6263body, err := io.ReadAll(io.LimitReader(resp.Body, maxResponseBodyBytes))64if err != nil {65return false, "", err66}6768ctx, err := AnalyzeReflectionContext(string(body), payload)69if err != nil {70return false, "", err71}72if ctx == ContextUnknown {73return false, "", nil74}75return true, "xss-reflected in " + ctx.String(), nil76}7778// urlAttrs lists attributes whose values may contain navigable URIs.79// ping was missed initially, it fires a POST to the URL when <a> is clicked.80var urlAttrs = map[string]struct{}{81"href": {},82"src": {},83"action": {},84"formaction": {},85"data": {},86"poster": {},87"codebase": {},88"cite": {},89"background": {},90"manifest": {},91"icon": {},92"ping": {},93"longdesc": {},94}9596// eventHandlers lists attributes that execute JavaScript when triggered.97var eventHandlers = map[string]struct{}{98"onabort": {},99"onafterprint": {},100"onbeforeprint": {},101"onbeforeunload": {},102"onblur": {},103"oncancel": {},104"oncanplay": {},105"oncanplaythrough": {},106"onchange": {},107"onclick": {},108"onclose": {},109"oncontextmenu": {},110"oncopy": {},111"oncuechange": {},112"oncut": {},113"ondblclick": {},114"ondrag": {},115"ondragend": {},116"ondragenter": {},117"ondragleave": {},118"ondragover": {},119"ondragstart": {},120"ondrop": {},121"ondurationchange": {},122"onemptied": {},123"onended": {},124"onerror": {},125"onfocus": {},126"onfocusin": {},127"onfocusout": {},128"onhashchange": {},129"oninput": {},130"oninvalid": {},131"onkeydown": {},132"onkeypress": {},133"onkeyup": {},134"onload": {},135"onloadeddata": {},136"onloadedmetadata": {},137"onloadstart": {},138"onmessage": {},139"onmousedown": {},140"onmouseenter": {},141"onmouseleave": {},142"onmousemove": {},143"onmouseout": {},144"onmouseover": {},145"onmouseup": {},146"onoffline": {},147"ononline": {},148"onpagehide": {},149"onpageshow": {},150"onpaste": {},151"onpause": {},152"onplay": {},153"onplaying": {},154"onpopstate": {},155"onprogress": {},156"onratechange": {},157"onreset": {},158"onresize": {},159"onscroll": {},160"onsearch": {},161"onseeked": {},162"onseeking": {},163"onselect": {},164"onstalled": {},165"onstorage": {},166"onsubmit": {},167"onsuspend": {},168"ontimeupdate": {},169"ontoggle": {},170"onunload": {},171"onvolumechange": {},172"onwaiting": {},173"onwheel": {},174"onanimationstart": {},175"onanimationend": {},176"onanimationiteration": {},177"ontransitionend": {},178"onpointerdown": {},179"onpointerup": {},180"onpointermove": {},181"onpointerover": {},182"onpointerout": {},183"onpointerenter": {},184"onpointerleave": {},185"onpointercancel": {},186"ongotpointercapture": {},187"onlostpointercapture": {},188"ontouchstart": {},189"ontouchend": {},190"ontouchmove": {},191"ontouchcancel": {},192// added after review, these are newer DOM events that were missing193"onauxclick": {},194"onbeforeinput": {},195"onformdata": {},196"onslotchange": {},197"onsecuritypolicyviolation": {},198}199200// executableScriptTypes lists MIME types that browsers actually execute.201// Empty string covers <script> with no type attribute.202var executableScriptTypes = map[string]struct{}{203"": {},204"text/javascript": {},205"application/javascript": {},206"text/ecmascript": {},207"application/ecmascript": {},208"module": {},209"text/jscript": {},210"text/livescript": {},211"text/x-ecmascript": {},212"text/x-javascript": {},213"application/x-javascript": {},214"application/x-ecmascript": {},215}216217// executableURLSinks maps URL attribute names to the set of tags where218// dangerous URIs (javascript:, vbscript:, data:text/html, etc.) actually219// execute or render a document. Other tag+attr combos stay as220// ContextHTMLAttributeURL — e.g. <img src="javascript:..."> doesn't execute.221var executableURLSinks = map[string]map[string]struct{}{222"href": {"a": {}, "area": {}},223"src": {"iframe": {}, "frame": {}, "embed": {}},224"action": {"form": {}},225"formaction": {"button": {}, "input": {}},226"data": {"object": {}},227}228229// AnalyzeReflectionContext determines the HTML context where the given marker230// is reflected in the response body. Uses golang.org/x/net/html tokenizer231// for parsing. Returns ContextUnknown if the marker is not found.232func AnalyzeReflectionContext(responseBody, marker string) (XSSContext, error) {233if responseBody == "" || marker == "" {234return ContextUnknown, nil235}236237markerLower := strings.ToLower(marker)238239// bail early if the marker isn't anywhere in the body240if !strings.Contains(strings.ToLower(responseBody), markerLower) {241return ContextUnknown, nil242}243244tokenizer := html.NewTokenizer(strings.NewReader(responseBody))245246var (247inScript bool248inStyle bool249scriptIsExec bool250)251252for {253tt := tokenizer.Next()254switch tt {255case html.ErrorToken:256// EOF is expected (end of doc), but surface real parse errors257if err := tokenizer.Err(); err != nil && err != io.EOF {258return ContextUnknown, err259}260return ContextUnknown, nil261262case html.CommentToken:263if containsMarker(tokenizer.Token().Data, markerLower) {264return ContextComment, nil265}266267case html.StartTagToken, html.SelfClosingTagToken:268tn, hasAttr := tokenizer.TagName()269tagName := strings.ToLower(string(tn))270271// Important: TagAttr() is a forward-only iterator. If we checked272// script type and marker in separate loops, the second loop would273// see no attributes (already consumed). So we do both in one pass.274if hasAttr {275ctx, found, scriptType := scanAttributes(tokenizer, markerLower, tagName)276if found {277return ctx, nil278}279if tt == html.StartTagToken && tagName == "script" {280inScript = true281scriptIsExec = isScriptTypeExecutable(scriptType)282}283} else if tt == html.StartTagToken && tagName == "script" {284inScript = true285scriptIsExec = true // no attrs = executable286}287288if tt == html.StartTagToken && tagName == "style" {289inStyle = true290}291292case html.EndTagToken:293tn, _ := tokenizer.TagName()294switch strings.ToLower(string(tn)) {295case "script":296inScript = false297case "style":298inStyle = false299}300301case html.TextToken:302if containsMarker(tokenizer.Token().Data, markerLower) {303if inScript {304if scriptIsExec {305return ContextScript, nil306}307return ContextScriptData, nil308}309if inStyle {310return ContextStyle, nil311}312return ContextHTMLBody, nil313}314}315}316}317318// scanAttributes walks all attributes in one pass. We need this because319// the tokenizer's TagAttr() is consumable, once you iterate through,320// the attributes are gone. Earlier version had a bug where checking the321// script type first would eat all the attrs before we could check for322// the marker, so <script src="MARKER"> would silently miss the reflection.323func scanAttributes(tokenizer *html.Tokenizer, markerLower, tagName string) (XSSContext, bool, string) {324var markerCtx XSSContext325markerFound := false326scriptType := ""327foundType := false328329for {330key, val, more := tokenizer.TagAttr()331attrName := strings.ToLower(string(key))332attrValue := string(val)333334// HTML5 spec: browsers use the first type attribute when dupes exist.335// Without this check, <script type="application/json" type="text/javascript">336// would be classified as executable (last wins) when the browser treats it337// as non-executable (first wins).338if attrName == "type" && !foundType {339scriptType = strings.ToLower(strings.TrimSpace(attrValue))340foundType = true341}342343if !markerFound {344if containsMarker(attrValue, markerLower) {345markerCtx = classifyAttributeContext(attrName, attrValue, tagName)346markerFound = true347} else if containsMarker(attrName, markerLower) {348markerCtx = ContextHTMLAttribute349markerFound = true350}351}352353if !more {354break355}356}357358return markerCtx, markerFound, scriptType359}360361// isScriptTypeExecutable returns true if the type value is something362// browsers will actually run (or empty, meaning no type was set).363// Strips MIME parameters first, browsers still execute364// "text/javascript; charset=utf-8" but the raw string wouldn't match365// the lookup table without this.366func isScriptTypeExecutable(scriptType string) bool {367if i := strings.IndexByte(scriptType, ';'); i != -1 {368scriptType = strings.TrimSpace(scriptType[:i])369}370_, isExec := executableScriptTypes[scriptType]371return isExec372}373374// classifyAttributeContext maps an attribute name to the right XSS context.375func classifyAttributeContext(attrName, attrValue, tagName string) XSSContext {376if _, ok := eventHandlers[attrName]; ok {377return ContextHTMLAttributeEvent378}379380if attrName == "style" {381return ContextStyle382}383384if attrName == "srcdoc" {385return ContextHTMLBody386}387388if _, ok := urlAttrs[attrName]; ok {389trimmed := strings.TrimSpace(strings.ToLower(attrValue))390if strings.HasPrefix(trimmed, "javascript:") ||391strings.HasPrefix(trimmed, "vbscript:") ||392strings.HasPrefix(trimmed, "data:text/html") ||393strings.HasPrefix(trimmed, "data:application/xhtml+xml") ||394strings.HasPrefix(trimmed, "data:image/svg+xml") {395// only promote to ContextScript if this tag+attr pair actually396// executes dangerous URIs in browsers — <img src="javascript:...">397// doesn't execute, <a href="javascript:..."> does398if tags, ok := executableURLSinks[attrName]; ok {399if _, ok := tags[tagName]; ok {400return ContextScript401}402}403}404return ContextHTMLAttributeURL405}406407return ContextHTMLAttribute408}409410// containsMarker does a case-insensitive substring check.411// markerLower must already be lowercased by the caller.412func containsMarker(text, markerLower string) bool {413return strings.Contains(strings.ToLower(text), markerLower)414}415416417