CoCalc -- basic.py

GitHub Repository: sqlmapproject/sqlmap
Path: blob/master/lib/request/basic.py
²⁹⁸⁹ views
1
#!/usr/bin/env python
2

3
"""
4
Copyright (c) 2006-2025 sqlmap developers (https://sqlmap.org)
5
See the file 'LICENSE' for copying permission
6
"""
7

8
import codecs
9
import gzip
10
import io
11
import logging
12
import re
13
import struct
14
import zlib
15

16
from lib.core.common import Backend
17
from lib.core.common import extractErrorMessage
18
from lib.core.common import extractRegexResult
19
from lib.core.common import filterNone
20
from lib.core.common import getPublicTypeMembers
21
from lib.core.common import getSafeExString
22
from lib.core.common import isListLike
23
from lib.core.common import randomStr
24
from lib.core.common import readInput
25
from lib.core.common import resetCookieJar
26
from lib.core.common import singleTimeLogMessage
27
from lib.core.common import singleTimeWarnMessage
28
from lib.core.common import unArrayizeValue
29
from lib.core.convert import decodeHex
30
from lib.core.convert import getBytes
31
from lib.core.convert import getText
32
from lib.core.convert import getUnicode
33
from lib.core.data import conf
34
from lib.core.data import kb
35
from lib.core.data import logger
36
from lib.core.decorators import cachedmethod
37
from lib.core.decorators import lockedmethod
38
from lib.core.dicts import HTML_ENTITIES
39
from lib.core.enums import DBMS
40
from lib.core.enums import HTTP_HEADER
41
from lib.core.enums import PLACE
42
from lib.core.exception import SqlmapCompressionException
43
from lib.core.settings import BLOCKED_IP_REGEX
44
from lib.core.settings import DEFAULT_COOKIE_DELIMITER
45
from lib.core.settings import EVENTVALIDATION_REGEX
46
from lib.core.settings import HEURISTIC_PAGE_SIZE_THRESHOLD
47
from lib.core.settings import IDENTYWAF_PARSE_LIMIT
48
from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
49
from lib.core.settings import META_CHARSET_REGEX
50
from lib.core.settings import PARSE_HEADERS_LIMIT
51
from lib.core.settings import PRINTABLE_BYTES
52
from lib.core.settings import SELECT_FROM_TABLE_REGEX
53
from lib.core.settings import UNICODE_ENCODING
54
from lib.core.settings import VIEWSTATE_REGEX
55
from lib.parse.headers import headersParser
56
from lib.parse.html import htmlParser
57
from thirdparty import six
58
from thirdparty.chardet import detect
59
from thirdparty.identywaf import identYwaf
60
from thirdparty.odict import OrderedDict
61
from thirdparty.six import unichr as _unichr
62
from thirdparty.six.moves import http_client as _http_client
63

64
@lockedmethod
65
def forgeHeaders(items=None, base=None):
66
    """
67
    Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
68
    the HTTP requests
69
    """
70

71
    items = items or {}
72

73
    for _ in list(items.keys()):
74
        if items[_] is None:
75
            del items[_]
76

77
    headers = OrderedDict(conf.httpHeaders if base is None else base)
78
    headers.update(items.items())
79

80
    class _str(str):
81
        def capitalize(self):
82
            return _str(self)
83

84
        def title(self):
85
            return _str(self)
86

87
    _ = headers
88
    headers = OrderedDict()
89
    for key, value in _.items():
90
        success = False
91

92
        for _ in headers:
93
            if _.upper() == key.upper():
94
                del headers[_]
95
                break
96

97
        if key.upper() not in (_.upper() for _ in getPublicTypeMembers(HTTP_HEADER, True)):
98
            try:
99
                headers[_str(key)] = value  # dirty hack for http://bugs.python.org/issue12455
100
            except UnicodeEncodeError:      # don't do the hack on non-ASCII header names (they have to be properly encoded later on)
101
                pass
102
            else:
103
                success = True
104
        if not success:
105
            key = '-'.join(_.capitalize() for _ in key.split('-'))
106
            headers[key] = value
107

108
    if conf.cj:
109
        if HTTP_HEADER.COOKIE in headers:
110
            for cookie in conf.cj:
111
                if cookie is None or cookie.domain_specified and not (conf.hostname or "").endswith(cookie.domain):
112
                    continue
113

114
                if ("%s=" % getUnicode(cookie.name)) in getUnicode(headers[HTTP_HEADER.COOKIE]):
115
                    if conf.loadCookies:
116
                        conf.httpHeaders = filterNone((item if item[0] != HTTP_HEADER.COOKIE else None) for item in conf.httpHeaders)
117
                    elif kb.mergeCookies is None:
118
                        message = "you provided a HTTP %s header value, while " % HTTP_HEADER.COOKIE
119
                        message += "target URL provides its own cookies within "
120
                        message += "HTTP %s header which intersect with yours. " % HTTP_HEADER.SET_COOKIE
121
                        message += "Do you want to merge them in further requests? [Y/n] "
122

123
                        kb.mergeCookies = readInput(message, default='Y', boolean=True)
124

125
                    if kb.mergeCookies and kb.injection.place != PLACE.COOKIE:
126
                        def _(value):
127
                            return re.sub(r"(?i)\b%s=[^%s]+" % (re.escape(getUnicode(cookie.name)), conf.cookieDel or DEFAULT_COOKIE_DELIMITER), ("%s=%s" % (getUnicode(cookie.name), getUnicode(cookie.value))).replace('\\', r'\\'), value)
128

129
                        headers[HTTP_HEADER.COOKIE] = _(headers[HTTP_HEADER.COOKIE])
130

131
                        if PLACE.COOKIE in conf.parameters:
132
                            conf.parameters[PLACE.COOKIE] = _(conf.parameters[PLACE.COOKIE])
133

134
                        conf.httpHeaders = [(item[0], item[1] if item[0] != HTTP_HEADER.COOKIE else _(item[1])) for item in conf.httpHeaders]
135

136
                elif not kb.testMode:
137
                    headers[HTTP_HEADER.COOKIE] += "%s %s=%s" % (conf.cookieDel or DEFAULT_COOKIE_DELIMITER, getUnicode(cookie.name), getUnicode(cookie.value))
138

139
        if kb.testMode and not any((conf.csrfToken, conf.safeUrl)):
140
            resetCookieJar(conf.cj)
141

142
    return headers
143

144
def parseResponse(page, headers, status=None):
145
    """
146
    @param page: the page to parse to feed the knowledge base htmlFp
147
    (back-end DBMS fingerprint based upon DBMS error messages return
148
    through the web application) list and absFilePaths (absolute file
149
    paths) set.
150
    """
151

152
    if headers:
153
        headersParser(headers)
154

155
    if page:
156
        htmlParser(page if not status else "%s\n\n%s" % (status, page))
157

158
@cachedmethod
159
def checkCharEncoding(encoding, warn=True):
160
    """
161
    Checks encoding name, repairs common misspellings and adjusts to
162
    proper namings used in codecs module
163

164
    >>> checkCharEncoding('iso-8858', False)
165
    'iso8859-1'
166
    >>> checkCharEncoding('en_us', False)
167
    'utf8'
168
    """
169

170
    if isinstance(encoding, six.binary_type):
171
        encoding = getUnicode(encoding)
172

173
    if isListLike(encoding):
174
        encoding = unArrayizeValue(encoding)
175

176
    if encoding:
177
        encoding = encoding.lower()
178
    else:
179
        return encoding
180

181
    # Reference: http://www.destructor.de/charsets/index.htm
182
    translate = {"windows-874": "iso-8859-11", "utf-8859-1": "utf8", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8", "ebcdic": "ebcdic-cp-be", "iso-8859": "iso8859-1", "iso-8859-0": "iso8859-1", "ansi": "ascii", "gbk2312": "gbk", "windows-31j": "cp932", "en": "us"}
183

184
    for delimiter in (';', ',', '('):
185
        if delimiter in encoding:
186
            encoding = encoding[:encoding.find(delimiter)].strip()
187

188
    encoding = encoding.replace("&quot", "")
189

190
    # popular typos/errors
191
    if "8858" in encoding:
192
        encoding = encoding.replace("8858", "8859")  # iso-8858 -> iso-8859
193
    elif "8559" in encoding:
194
        encoding = encoding.replace("8559", "8859")  # iso-8559 -> iso-8859
195
    elif "8895" in encoding:
196
        encoding = encoding.replace("8895", "8859")  # iso-8895 -> iso-8859
197
    elif "5889" in encoding:
198
        encoding = encoding.replace("5889", "8859")  # iso-5889 -> iso-8859
199
    elif "5589" in encoding:
200
        encoding = encoding.replace("5589", "8859")  # iso-5589 -> iso-8859
201
    elif "2313" in encoding:
202
        encoding = encoding.replace("2313", "2312")  # gb2313 -> gb2312
203
    elif encoding.startswith("x-"):
204
        encoding = encoding[len("x-"):]              # x-euc-kr -> euc-kr  /  x-mac-turkish -> mac-turkish
205
    elif "windows-cp" in encoding:
206
        encoding = encoding.replace("windows-cp", "windows")  # windows-cp-1254 -> windows-1254
207

208
    # name adjustment for compatibility
209
    if encoding.startswith("8859"):
210
        encoding = "iso-%s" % encoding
211
    elif encoding.startswith("cp-"):
212
        encoding = "cp%s" % encoding[3:]
213
    elif encoding.startswith("euc-"):
214
        encoding = "euc_%s" % encoding[4:]
215
    elif encoding.startswith("windows") and not encoding.startswith("windows-"):
216
        encoding = "windows-%s" % encoding[7:]
217
    elif encoding.find("iso-88") > 0:
218
        encoding = encoding[encoding.find("iso-88"):]
219
    elif encoding.startswith("is0-"):
220
        encoding = "iso%s" % encoding[4:]
221
    elif encoding.find("ascii") > 0:
222
        encoding = "ascii"
223
    elif encoding.find("utf8") > 0:
224
        encoding = "utf8"
225
    elif encoding.find("utf-8") > 0:
226
        encoding = "utf-8"
227

228
    # Reference: http://philip.html5.org/data/charsets-2.html
229
    if encoding in translate:
230
        encoding = translate[encoding]
231
    elif encoding in ("null", "{charset}", "charset", "*") or not re.search(r"\w", encoding):
232
        return None
233

234
    # Reference: http://www.iana.org/assignments/character-sets
235
    # Reference: http://docs.python.org/library/codecs.html
236
    try:
237
        codecs.lookup(encoding)
238
    except:
239
        encoding = None
240

241
    if encoding:
242
        try:
243
            six.text_type(getBytes(randomStr()), encoding)
244
        except:
245
            if warn:
246
                warnMsg = "invalid web page charset '%s'" % encoding
247
                singleTimeLogMessage(warnMsg, logging.WARN, encoding)
248
            encoding = None
249

250
    return encoding
251

252
def getHeuristicCharEncoding(page):
253
    """
254
    Returns page encoding charset detected by usage of heuristics
255

256
    Reference: https://chardet.readthedocs.io/en/latest/usage.html
257

258
    >>> getHeuristicCharEncoding(b"<html></html>")
259
    'ascii'
260
    """
261

262
    key = hash(page)
263
    retVal = kb.cache.encoding[key] if key in kb.cache.encoding else detect(page[:HEURISTIC_PAGE_SIZE_THRESHOLD])["encoding"]
264
    kb.cache.encoding[key] = retVal
265

266
    if retVal and retVal.lower().replace('-', "") == UNICODE_ENCODING.lower().replace('-', ""):
267
        infoMsg = "heuristics detected web page charset '%s'" % retVal
268
        singleTimeLogMessage(infoMsg, logging.INFO, retVal)
269

270
    return retVal
271

272
def decodePage(page, contentEncoding, contentType, percentDecode=True):
273
    """
274
    Decode compressed/charset HTTP response
275

276
    >>> getText(decodePage(b"<html>foo&amp;bar</html>", None, "text/html; charset=utf-8"))
277
    '<html>foo&bar</html>'
278
    >>> getText(decodePage(b"&#x9;", None, "text/html; charset=utf-8"))
279
    '\\t'
280
    """
281

282
    if not page or (conf.nullConnection and len(page) < 2):
283
        return getUnicode(page)
284

285
    contentEncoding = contentEncoding.lower() if hasattr(contentEncoding, "lower") else ""
286
    contentType = contentType.lower() if hasattr(contentType, "lower") else ""
287

288
    if contentEncoding in ("gzip", "x-gzip", "deflate"):
289
        if not kb.pageCompress:
290
            return None
291

292
        try:
293
            if contentEncoding == "deflate":
294
                data = io.BytesIO(zlib.decompress(page, -15))  # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
295
            else:
296
                data = gzip.GzipFile("", "rb", 9, io.BytesIO(page))
297
                size = struct.unpack("<l", page[-4:])[0]  # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py
298
                if size > MAX_CONNECTION_TOTAL_SIZE:
299
                    raise Exception("size too large")
300

301
            page = data.read()
302
        except Exception as ex:
303
            if b"<html" not in page:  # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored)
304
                errMsg = "detected invalid data for declared content "
305
                errMsg += "encoding '%s' ('%s')" % (contentEncoding, getSafeExString(ex))
306
                singleTimeLogMessage(errMsg, logging.ERROR)
307

308
                warnMsg = "turning off page compression"
309
                singleTimeWarnMessage(warnMsg)
310

311
                kb.pageCompress = False
312
                raise SqlmapCompressionException
313

314
    if not conf.encoding:
315
        httpCharset, metaCharset = None, None
316

317
        # Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
318
        if contentType.find("charset=") != -1:
319
            httpCharset = checkCharEncoding(contentType.split("charset=")[-1])
320

321
        metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page))
322

323
        if (any((httpCharset, metaCharset)) and (not all((httpCharset, metaCharset)) or isinstance(page, six.binary_type) and all(_ in PRINTABLE_BYTES for _ in page))) or (httpCharset == metaCharset and all((httpCharset, metaCharset))):
324
            kb.pageEncoding = httpCharset or metaCharset  # Reference: http://bytes.com/topic/html-css/answers/154758-http-equiv-vs-true-header-has-precedence
325
            debugMsg = "declared web page charset '%s'" % kb.pageEncoding
326
            singleTimeLogMessage(debugMsg, logging.DEBUG, debugMsg)
327
        else:
328
            kb.pageEncoding = None
329
    else:
330
        kb.pageEncoding = conf.encoding
331

332
    # can't do for all responses because we need to support binary files too
333
    if isinstance(page, six.binary_type) and "text/" in contentType:
334
        if not kb.disableHtmlDecoding:
335
            # e.g. &#x9;&#195;&#235;&#224;&#226;&#224;
336
            if b"&#" in page:
337
                page = re.sub(b"&#x([0-9a-f]{1,2});", lambda _: decodeHex(_.group(1) if len(_.group(1)) == 2 else b"0%s" % _.group(1)), page)
338
                page = re.sub(b"&#(\\d{1,3});", lambda _: six.int2byte(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
339

340
            # e.g. %20%28%29
341
            if percentDecode:
342
                if b"%" in page:
343
                    page = re.sub(b"%([0-9a-f]{2})", lambda _: decodeHex(_.group(1)), page)
344
                    page = re.sub(b"%([0-9A-F]{2})", lambda _: decodeHex(_.group(1)), page)     # Note: %DeepSee_SQL in CACHE
345

346
            # e.g. &amp;
347
            page = re.sub(b"&([^;]+);", lambda _: six.int2byte(HTML_ENTITIES[getText(_.group(1))]) if HTML_ENTITIES.get(getText(_.group(1)), 256) < 256 else _.group(0), page)
348

349
            kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
350

351
            if (kb.pageEncoding or "").lower() == "utf-8-sig":
352
                kb.pageEncoding = "utf-8"
353
                if page and page.startswith(b"\xef\xbb\xbf"):  # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling)
354
                    page = page[3:]
355

356
            page = getUnicode(page, kb.pageEncoding)
357

358
            # e.g. &#8217;&#8230;&#8482;
359
            if "&#" in page:
360
                def _(match):
361
                    retVal = match.group(0)
362
                    try:
363
                        retVal = _unichr(int(match.group(1)))
364
                    except (ValueError, OverflowError):
365
                        pass
366
                    return retVal
367
                page = re.sub(r"&#(\d+);", _, page)
368

369
            # e.g. &zeta;
370
            page = re.sub(r"&([^;]+);", lambda _: _unichr(HTML_ENTITIES[_.group(1)]) if HTML_ENTITIES.get(_.group(1), 0) > 255 else _.group(0), page)
371
        else:
372
            page = getUnicode(page, kb.pageEncoding)
373

374
    return page
375

376
def processResponse(page, responseHeaders, code=None, status=None):
377
    kb.processResponseCounter += 1
378
    page = page or ""
379

380
    parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None, status)
381

382
    if not kb.tableFrom and Backend.getIdentifiedDbms() in (DBMS.ACCESS,):
383
        kb.tableFrom = extractRegexResult(SELECT_FROM_TABLE_REGEX, page)
384
    else:
385
        kb.tableFrom = None
386

387
    if conf.parseErrors:
388
        msg = extractErrorMessage(page)
389

390
        if msg:
391
            logger.warning("parsed DBMS error message: '%s'" % msg.rstrip('.'))
392

393
    if not conf.skipWaf and kb.processResponseCounter < IDENTYWAF_PARSE_LIMIT:
394
        rawResponse = "%s %s %s\n%s\n%s" % (_http_client.HTTPConnection._http_vsn_str, code or "", status or "", "".join(getUnicode(responseHeaders.headers if responseHeaders else [])), page[:HEURISTIC_PAGE_SIZE_THRESHOLD])
395

396
        with kb.locks.identYwaf:
397
            identYwaf.non_blind.clear()
398
            try:
399
                if identYwaf.non_blind_check(rawResponse, silent=True):
400
                    for waf in set(identYwaf.non_blind):
401
                        if waf not in kb.identifiedWafs:
402
                            kb.identifiedWafs.add(waf)
403
                            errMsg = "WAF/IPS identified as '%s'" % identYwaf.format_name(waf)
404
                            singleTimeLogMessage(errMsg, logging.CRITICAL)
405
            except Exception as ex:
406
                singleTimeWarnMessage("internal error occurred in WAF/IPS detection ('%s')" % getSafeExString(ex))
407

408
    if kb.originalPage is None:
409
        for regex in (EVENTVALIDATION_REGEX, VIEWSTATE_REGEX):
410
            match = re.search(regex, page)
411
            if match and PLACE.POST in conf.parameters:
412
                name, value = match.groups()
413
                if PLACE.POST in conf.paramDict and name in conf.paramDict[PLACE.POST]:
414
                    if conf.paramDict[PLACE.POST][name] in page:
415
                        continue
416
                    else:
417
                        msg = "do you want to automatically adjust the value of '%s'? [y/N]" % name
418

419
                        if not readInput(msg, default='N', boolean=True):
420
                            continue
421

422
                        conf.paramDict[PLACE.POST][name] = value
423
                conf.parameters[PLACE.POST] = re.sub(r"(?i)(%s=)[^&]+" % re.escape(name), r"\g<1>%s" % value.replace('\\', r'\\'), conf.parameters[PLACE.POST])
424

425
    if not kb.browserVerification and re.search(r"(?i)browser.?verification", page or ""):
426
        kb.browserVerification = True
427
        warnMsg = "potential browser verification protection mechanism detected"
428
        if re.search(r"(?i)CloudFlare", page):
429
            warnMsg += " (CloudFlare)"
430
        singleTimeWarnMessage(warnMsg)
431

432
    if not kb.captchaDetected and re.search(r"(?i)captcha", page or ""):
433
        for match in re.finditer(r"(?si)<form.+?</form>", page):
434
            if re.search(r"(?i)captcha", match.group(0)):
435
                kb.captchaDetected = True
436
                break
437

438
        if re.search(r"<meta[^>]+\brefresh\b[^>]+\bcaptcha\b", page):
439
            kb.captchaDetected = True
440

441
        if kb.captchaDetected:
442
            warnMsg = "potential CAPTCHA protection mechanism detected"
443
            if re.search(r"(?i)<title>[^<]*CloudFlare", page):
444
                warnMsg += " (CloudFlare)"
445
            singleTimeWarnMessage(warnMsg)
446

447
    if re.search(BLOCKED_IP_REGEX, page):
448
        warnMsg = "it appears that you have been blocked by the target server"
449
        singleTimeWarnMessage(warnMsg)
450

451
Product

Resources

Company