Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sqlmapproject
GitHub Repository: sqlmapproject/sqlmap
Path: blob/master/lib/request/basic.py
3553 views
1
#!/usr/bin/env python
2
3
"""
4
Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
5
See the file 'LICENSE' for copying permission
6
"""
7
8
import codecs
9
import gzip
10
import io
11
import logging
12
import re
13
import zlib
14
15
from lib.core.common import Backend
16
from lib.core.common import extractErrorMessage
17
from lib.core.common import extractRegexResult
18
from lib.core.common import filterNone
19
from lib.core.common import getPublicTypeMembers
20
from lib.core.common import getSafeExString
21
from lib.core.common import isListLike
22
from lib.core.common import randomStr
23
from lib.core.common import readInput
24
from lib.core.common import resetCookieJar
25
from lib.core.common import singleTimeLogMessage
26
from lib.core.common import singleTimeWarnMessage
27
from lib.core.common import unArrayizeValue
28
from lib.core.convert import decodeHex
29
from lib.core.convert import getBytes
30
from lib.core.convert import getText
31
from lib.core.convert import getUnicode
32
from lib.core.data import conf
33
from lib.core.data import kb
34
from lib.core.data import logger
35
from lib.core.decorators import cachedmethod
36
from lib.core.decorators import lockedmethod
37
from lib.core.dicts import HTML_ENTITIES
38
from lib.core.enums import DBMS
39
from lib.core.enums import HTTP_HEADER
40
from lib.core.enums import PLACE
41
from lib.core.exception import SqlmapCompressionException
42
from lib.core.settings import BLOCKED_IP_REGEX
43
from lib.core.settings import DEFAULT_COOKIE_DELIMITER
44
from lib.core.settings import EVENTVALIDATION_REGEX
45
from lib.core.settings import HEURISTIC_PAGE_SIZE_THRESHOLD
46
from lib.core.settings import IDENTYWAF_PARSE_COUNT_LIMIT
47
from lib.core.settings import IDENTYWAF_PARSE_PAGE_LIMIT
48
from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
49
from lib.core.settings import META_CHARSET_REGEX
50
from lib.core.settings import PARSE_HEADERS_LIMIT
51
from lib.core.settings import PRINTABLE_BYTES
52
from lib.core.settings import SELECT_FROM_TABLE_REGEX
53
from lib.core.settings import UNICODE_ENCODING
54
from lib.core.settings import VIEWSTATE_REGEX
55
from lib.parse.headers import headersParser
56
from lib.parse.html import htmlParser
57
from thirdparty import six
58
from thirdparty.chardet import detect
59
from thirdparty.identywaf import identYwaf
60
from thirdparty.odict import OrderedDict
61
from thirdparty.six import unichr as _unichr
62
from thirdparty.six.moves import http_client as _http_client
63
64
@lockedmethod
65
def forgeHeaders(items=None, base=None):
66
"""
67
Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
68
the HTTP requests
69
"""
70
71
items = items or {}
72
73
for _ in list(items.keys()):
74
if items[_] is None:
75
del items[_]
76
77
headers = OrderedDict(conf.httpHeaders if base is None else base)
78
headers.update(items.items())
79
80
class _str(str):
81
def capitalize(self):
82
return _str(self)
83
84
def title(self):
85
return _str(self)
86
87
_ = headers
88
headers = OrderedDict()
89
for key, value in _.items():
90
success = False
91
92
for _ in headers:
93
if _.upper() == key.upper():
94
del headers[_]
95
break
96
97
if key.upper() not in (_.upper() for _ in getPublicTypeMembers(HTTP_HEADER, True)):
98
try:
99
headers[_str(key)] = value # dirty hack for http://bugs.python.org/issue12455
100
except UnicodeEncodeError: # don't do the hack on non-ASCII header names (they have to be properly encoded later on)
101
pass
102
else:
103
success = True
104
if not success:
105
key = '-'.join(_.capitalize() for _ in key.split('-'))
106
headers[key] = value
107
108
if conf.cj:
109
if HTTP_HEADER.COOKIE in headers:
110
for cookie in conf.cj:
111
if cookie is None or cookie.domain_specified and not (conf.hostname or "").endswith(cookie.domain):
112
continue
113
114
if ("%s=" % getUnicode(cookie.name)) in getUnicode(headers[HTTP_HEADER.COOKIE]):
115
if conf.loadCookies:
116
conf.httpHeaders = filterNone((item if item[0] != HTTP_HEADER.COOKIE else None) for item in conf.httpHeaders)
117
elif kb.mergeCookies is None:
118
message = "you provided a HTTP %s header value, while " % HTTP_HEADER.COOKIE
119
message += "target URL provides its own cookies within "
120
message += "HTTP %s header which intersect with yours. " % HTTP_HEADER.SET_COOKIE
121
message += "Do you want to merge them in further requests? [Y/n] "
122
123
kb.mergeCookies = readInput(message, default='Y', boolean=True)
124
125
if kb.mergeCookies and kb.injection.place != PLACE.COOKIE:
126
def _(value):
127
return re.sub(r"(?i)\b%s=[^%s]+" % (re.escape(getUnicode(cookie.name)), conf.cookieDel or DEFAULT_COOKIE_DELIMITER), ("%s=%s" % (getUnicode(cookie.name), getUnicode(cookie.value))).replace('\\', r'\\'), value)
128
129
headers[HTTP_HEADER.COOKIE] = _(headers[HTTP_HEADER.COOKIE])
130
131
if PLACE.COOKIE in conf.parameters:
132
conf.parameters[PLACE.COOKIE] = _(conf.parameters[PLACE.COOKIE])
133
134
conf.httpHeaders = [(item[0], item[1] if item[0] != HTTP_HEADER.COOKIE else _(item[1])) for item in conf.httpHeaders]
135
136
elif not kb.testMode:
137
headers[HTTP_HEADER.COOKIE] += "%s %s=%s" % (conf.cookieDel or DEFAULT_COOKIE_DELIMITER, getUnicode(cookie.name), getUnicode(cookie.value))
138
139
if kb.testMode and not any((conf.csrfToken, conf.safeUrl)):
140
resetCookieJar(conf.cj)
141
142
return headers
143
144
def parseResponse(page, headers, status=None):
145
"""
146
@param page: the page to parse to feed the knowledge base htmlFp
147
(back-end DBMS fingerprint based upon DBMS error messages return
148
through the web application) list and absFilePaths (absolute file
149
paths) set.
150
"""
151
152
if headers:
153
headersParser(headers)
154
155
if page:
156
htmlParser(page if not status else "%s\n\n%s" % (status, page))
157
158
@cachedmethod
159
def checkCharEncoding(encoding, warn=True):
160
"""
161
Checks encoding name, repairs common misspellings and adjusts to
162
proper namings used in codecs module
163
164
>>> checkCharEncoding('iso-8858', False)
165
'iso8859-1'
166
>>> checkCharEncoding('en_us', False)
167
'utf8'
168
"""
169
170
if isinstance(encoding, six.binary_type):
171
encoding = getUnicode(encoding)
172
173
if isListLike(encoding):
174
encoding = unArrayizeValue(encoding)
175
176
if encoding:
177
encoding = encoding.lower()
178
else:
179
return encoding
180
181
# Reference: http://www.destructor.de/charsets/index.htm
182
translate = {"windows-874": "iso-8859-11", "utf-8859-1": "utf8", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8", "ebcdic": "ebcdic-cp-be", "iso-8859": "iso8859-1", "iso-8859-0": "iso8859-1", "ansi": "ascii", "gbk2312": "gbk", "windows-31j": "cp932", "en": "us"}
183
184
for delimiter in (';', ',', '('):
185
if delimiter in encoding:
186
encoding = encoding[:encoding.find(delimiter)].strip()
187
188
encoding = encoding.replace("&quot", "")
189
190
# popular typos/errors
191
if "8858" in encoding:
192
encoding = encoding.replace("8858", "8859") # iso-8858 -> iso-8859
193
elif "8559" in encoding:
194
encoding = encoding.replace("8559", "8859") # iso-8559 -> iso-8859
195
elif "8895" in encoding:
196
encoding = encoding.replace("8895", "8859") # iso-8895 -> iso-8859
197
elif "5889" in encoding:
198
encoding = encoding.replace("5889", "8859") # iso-5889 -> iso-8859
199
elif "5589" in encoding:
200
encoding = encoding.replace("5589", "8859") # iso-5589 -> iso-8859
201
elif "2313" in encoding:
202
encoding = encoding.replace("2313", "2312") # gb2313 -> gb2312
203
elif encoding.startswith("x-"):
204
encoding = encoding[len("x-"):] # x-euc-kr -> euc-kr / x-mac-turkish -> mac-turkish
205
elif "windows-cp" in encoding:
206
encoding = encoding.replace("windows-cp", "windows") # windows-cp-1254 -> windows-1254
207
208
# name adjustment for compatibility
209
if encoding.startswith("8859"):
210
encoding = "iso-%s" % encoding
211
elif encoding.startswith("cp-"):
212
encoding = "cp%s" % encoding[3:]
213
elif encoding.startswith("euc-"):
214
encoding = "euc_%s" % encoding[4:]
215
elif encoding.startswith("windows") and not encoding.startswith("windows-"):
216
encoding = "windows-%s" % encoding[7:]
217
elif encoding.find("iso-88") > 0:
218
encoding = encoding[encoding.find("iso-88"):]
219
elif encoding.startswith("is0-"):
220
encoding = "iso%s" % encoding[4:]
221
elif encoding.find("ascii") > 0:
222
encoding = "ascii"
223
elif encoding.find("utf8") > 0:
224
encoding = "utf8"
225
elif encoding.find("utf-8") > 0:
226
encoding = "utf-8"
227
228
# Reference: http://philip.html5.org/data/charsets-2.html
229
if encoding in translate:
230
encoding = translate[encoding]
231
elif encoding in ("null", "{charset}", "charset", "*") or not re.search(r"\w", encoding):
232
return None
233
234
# Reference: http://www.iana.org/assignments/character-sets
235
# Reference: http://docs.python.org/library/codecs.html
236
try:
237
codecs.lookup(encoding)
238
except:
239
encoding = None
240
241
if encoding:
242
try:
243
six.text_type(getBytes(randomStr()), encoding)
244
except:
245
if warn:
246
warnMsg = "invalid web page charset '%s'" % encoding
247
singleTimeLogMessage(warnMsg, logging.WARN, encoding)
248
encoding = None
249
250
return encoding
251
252
@lockedmethod
253
def getHeuristicCharEncoding(page):
254
"""
255
Returns page encoding charset detected by usage of heuristics
256
257
Reference: https://chardet.readthedocs.io/en/latest/usage.html
258
259
>>> getHeuristicCharEncoding(b"<html></html>")
260
'ascii'
261
"""
262
263
key = (len(page), hash(page))
264
265
retVal = kb.cache.encoding.get(key)
266
if retVal is None:
267
retVal = detect(page[:HEURISTIC_PAGE_SIZE_THRESHOLD])["encoding"]
268
kb.cache.encoding[key] = retVal
269
270
if retVal and retVal.lower().replace('-', "") == UNICODE_ENCODING.lower().replace('-', ""):
271
infoMsg = "heuristics detected web page charset '%s'" % retVal
272
singleTimeLogMessage(infoMsg, logging.INFO, retVal)
273
274
return retVal
275
276
def decodePage(page, contentEncoding, contentType, percentDecode=True):
277
"""
278
Decode compressed/charset HTTP response
279
280
>>> getText(decodePage(b"<html>foo&amp;bar</html>", None, "text/html; charset=utf-8"))
281
'<html>foo&bar</html>'
282
>>> getText(decodePage(b"&#x9;", None, "text/html; charset=utf-8"))
283
'\\t'
284
"""
285
286
if not page or (conf.nullConnection and len(page) < 2):
287
return getUnicode(page)
288
289
contentEncoding = getText(contentEncoding).lower() if contentEncoding else ""
290
contentType = getText(contentType).lower() if contentType else ""
291
292
if contentEncoding in ("gzip", "x-gzip", "deflate"):
293
if not kb.pageCompress:
294
return None
295
296
try:
297
if contentEncoding == "deflate":
298
obj = zlib.decompressobj(-15)
299
page = obj.decompress(page, MAX_CONNECTION_TOTAL_SIZE + 1)
300
page += obj.flush()
301
if len(page) > MAX_CONNECTION_TOTAL_SIZE:
302
raise Exception("size too large")
303
else:
304
data = gzip.GzipFile("", "rb", 9, io.BytesIO(page))
305
page = data.read(MAX_CONNECTION_TOTAL_SIZE + 1)
306
if len(page) > MAX_CONNECTION_TOTAL_SIZE:
307
raise Exception("size too large")
308
except Exception as ex:
309
if b"<html" not in page: # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored)
310
errMsg = "detected invalid data for declared content "
311
errMsg += "encoding '%s' ('%s')" % (contentEncoding, getSafeExString(ex))
312
singleTimeLogMessage(errMsg, logging.ERROR)
313
314
warnMsg = "turning off page compression"
315
singleTimeWarnMessage(warnMsg)
316
317
kb.pageCompress = False
318
raise SqlmapCompressionException
319
320
if not conf.encoding:
321
httpCharset, metaCharset = None, None
322
323
# Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
324
if contentType.find("charset=") != -1:
325
httpCharset = checkCharEncoding(contentType.split("charset=")[-1])
326
327
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page))
328
329
if (any((httpCharset, metaCharset)) and (not all((httpCharset, metaCharset)) or isinstance(page, six.binary_type) and all(_ in PRINTABLE_BYTES for _ in page))) or (httpCharset == metaCharset and all((httpCharset, metaCharset))):
330
kb.pageEncoding = httpCharset or metaCharset # Reference: http://bytes.com/topic/html-css/answers/154758-http-equiv-vs-true-header-has-precedence
331
debugMsg = "declared web page charset '%s'" % kb.pageEncoding
332
singleTimeLogMessage(debugMsg, logging.DEBUG, debugMsg)
333
else:
334
kb.pageEncoding = None
335
else:
336
kb.pageEncoding = conf.encoding
337
338
# can't do for all responses because we need to support binary files too
339
if isinstance(page, six.binary_type) and "text/" in contentType:
340
if not kb.disableHtmlDecoding:
341
# e.g. &#x9;&#195;&#235;&#224;&#226;&#224;
342
if b"&#" in page:
343
page = re.sub(b"&#x([0-9a-f]{1,2});", lambda _: decodeHex(_.group(1) if len(_.group(1)) == 2 else b"0%s" % _.group(1)), page)
344
page = re.sub(b"&#(\\d{1,3});", lambda _: six.int2byte(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
345
346
# e.g. %20%28%29
347
if percentDecode:
348
if b"%" in page:
349
page = re.sub(b"%([0-9a-f]{2})", lambda _: decodeHex(_.group(1)), page)
350
page = re.sub(b"%([0-9A-F]{2})", lambda _: decodeHex(_.group(1)), page) # Note: %DeepSee_SQL in CACHE
351
352
# e.g. &amp;
353
page = re.sub(b"&([^;]+);", lambda _: six.int2byte(HTML_ENTITIES[getText(_.group(1))]) if HTML_ENTITIES.get(getText(_.group(1)), 256) < 256 else _.group(0), page)
354
355
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
356
357
if (kb.pageEncoding or "").lower() == "utf-8-sig":
358
kb.pageEncoding = "utf-8"
359
if page and page.startswith(b"\xef\xbb\xbf"): # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling)
360
page = page[3:]
361
362
page = getUnicode(page, kb.pageEncoding)
363
364
# e.g. &#8217;&#8230;&#8482;
365
if "&#" in page:
366
def _(match):
367
retVal = match.group(0)
368
try:
369
retVal = _unichr(int(match.group(1)))
370
except (ValueError, OverflowError):
371
pass
372
return retVal
373
page = re.sub(r"&#(\d+);", _, page)
374
375
# e.g. &zeta;
376
page = re.sub(r"&([^;]+);", lambda _: _unichr(HTML_ENTITIES[_.group(1)]) if HTML_ENTITIES.get(_.group(1), 0) > 255 else _.group(0), page)
377
else:
378
page = getUnicode(page, kb.pageEncoding)
379
380
return page
381
382
def processResponse(page, responseHeaders, code=None, status=None):
383
kb.processResponseCounter += 1
384
page = page or ""
385
386
parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None, status)
387
388
if not kb.tableFrom and Backend.getIdentifiedDbms() in (DBMS.ACCESS,):
389
kb.tableFrom = extractRegexResult(SELECT_FROM_TABLE_REGEX, page)
390
else:
391
kb.tableFrom = None
392
393
if conf.parseErrors:
394
msg = extractErrorMessage(page)
395
396
if msg:
397
logger.warning("parsed DBMS error message: '%s'" % msg.rstrip('.'))
398
399
if not conf.skipWaf and kb.processResponseCounter < IDENTYWAF_PARSE_COUNT_LIMIT:
400
rawResponse = "%s %s %s\n%s\n%s" % (_http_client.HTTPConnection._http_vsn_str, code or "", status or "", "".join(getUnicode(responseHeaders.headers if responseHeaders else [])), page[:IDENTYWAF_PARSE_PAGE_LIMIT] if not kb.checkWafMode else page[:HEURISTIC_PAGE_SIZE_THRESHOLD])
401
402
with kb.locks.identYwaf:
403
identYwaf.non_blind.clear()
404
try:
405
if identYwaf.non_blind_check(rawResponse, silent=True):
406
for waf in set(identYwaf.non_blind):
407
if waf not in kb.identifiedWafs:
408
kb.identifiedWafs.add(waf)
409
errMsg = "WAF/IPS identified as '%s'" % identYwaf.format_name(waf)
410
singleTimeLogMessage(errMsg, logging.CRITICAL)
411
except Exception as ex:
412
singleTimeWarnMessage("internal error occurred in WAF/IPS detection ('%s')" % getSafeExString(ex))
413
414
if kb.originalPage is None:
415
for regex in (EVENTVALIDATION_REGEX, VIEWSTATE_REGEX):
416
match = re.search(regex, page)
417
if match and PLACE.POST in conf.parameters:
418
name, value = match.groups()
419
if PLACE.POST in conf.paramDict and name in conf.paramDict[PLACE.POST]:
420
if conf.paramDict[PLACE.POST][name] in page:
421
continue
422
else:
423
msg = "do you want to automatically adjust the value of '%s'? [y/N]" % name
424
425
if not readInput(msg, default='N', boolean=True):
426
continue
427
428
conf.paramDict[PLACE.POST][name] = value
429
conf.parameters[PLACE.POST] = re.sub(r"(?i)(%s=)[^&]+" % re.escape(name), r"\g<1>%s" % value.replace('\\', r'\\'), conf.parameters[PLACE.POST])
430
431
if not kb.browserVerification and re.search(r"(?i)browser.?verification", page or ""):
432
kb.browserVerification = True
433
warnMsg = "potential browser verification protection mechanism detected"
434
if re.search(r"(?i)CloudFlare", page):
435
warnMsg += " (CloudFlare)"
436
singleTimeWarnMessage(warnMsg)
437
438
if not kb.captchaDetected and re.search(r"(?i)captcha", page or ""):
439
for match in re.finditer(r"(?si)<form.+?</form>", page):
440
if re.search(r"(?i)captcha", match.group(0)):
441
kb.captchaDetected = True
442
break
443
444
if re.search(r"<meta[^>]+\brefresh\b[^>]+\bcaptcha\b", page):
445
kb.captchaDetected = True
446
447
if kb.captchaDetected:
448
warnMsg = "potential CAPTCHA protection mechanism detected"
449
if re.search(r"(?i)<title>[^<]*CloudFlare", page):
450
warnMsg += " (CloudFlare)"
451
singleTimeWarnMessage(warnMsg)
452
453
if re.search(BLOCKED_IP_REGEX, page):
454
warnMsg = "it appears that you have been blocked by the target server"
455
singleTimeWarnMessage(warnMsg)
456
457