Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sqlmapproject
GitHub Repository: sqlmapproject/sqlmap
Path: blob/master/lib/request/basic.py
2989 views
1
#!/usr/bin/env python
2
3
"""
4
Copyright (c) 2006-2025 sqlmap developers (https://sqlmap.org)
5
See the file 'LICENSE' for copying permission
6
"""
7
8
import codecs
9
import gzip
10
import io
11
import logging
12
import re
13
import struct
14
import zlib
15
16
from lib.core.common import Backend
17
from lib.core.common import extractErrorMessage
18
from lib.core.common import extractRegexResult
19
from lib.core.common import filterNone
20
from lib.core.common import getPublicTypeMembers
21
from lib.core.common import getSafeExString
22
from lib.core.common import isListLike
23
from lib.core.common import randomStr
24
from lib.core.common import readInput
25
from lib.core.common import resetCookieJar
26
from lib.core.common import singleTimeLogMessage
27
from lib.core.common import singleTimeWarnMessage
28
from lib.core.common import unArrayizeValue
29
from lib.core.convert import decodeHex
30
from lib.core.convert import getBytes
31
from lib.core.convert import getText
32
from lib.core.convert import getUnicode
33
from lib.core.data import conf
34
from lib.core.data import kb
35
from lib.core.data import logger
36
from lib.core.decorators import cachedmethod
37
from lib.core.decorators import lockedmethod
38
from lib.core.dicts import HTML_ENTITIES
39
from lib.core.enums import DBMS
40
from lib.core.enums import HTTP_HEADER
41
from lib.core.enums import PLACE
42
from lib.core.exception import SqlmapCompressionException
43
from lib.core.settings import BLOCKED_IP_REGEX
44
from lib.core.settings import DEFAULT_COOKIE_DELIMITER
45
from lib.core.settings import EVENTVALIDATION_REGEX
46
from lib.core.settings import HEURISTIC_PAGE_SIZE_THRESHOLD
47
from lib.core.settings import IDENTYWAF_PARSE_LIMIT
48
from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
49
from lib.core.settings import META_CHARSET_REGEX
50
from lib.core.settings import PARSE_HEADERS_LIMIT
51
from lib.core.settings import PRINTABLE_BYTES
52
from lib.core.settings import SELECT_FROM_TABLE_REGEX
53
from lib.core.settings import UNICODE_ENCODING
54
from lib.core.settings import VIEWSTATE_REGEX
55
from lib.parse.headers import headersParser
56
from lib.parse.html import htmlParser
57
from thirdparty import six
58
from thirdparty.chardet import detect
59
from thirdparty.identywaf import identYwaf
60
from thirdparty.odict import OrderedDict
61
from thirdparty.six import unichr as _unichr
62
from thirdparty.six.moves import http_client as _http_client
63
64
@lockedmethod
65
def forgeHeaders(items=None, base=None):
66
"""
67
Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
68
the HTTP requests
69
"""
70
71
items = items or {}
72
73
for _ in list(items.keys()):
74
if items[_] is None:
75
del items[_]
76
77
headers = OrderedDict(conf.httpHeaders if base is None else base)
78
headers.update(items.items())
79
80
class _str(str):
81
def capitalize(self):
82
return _str(self)
83
84
def title(self):
85
return _str(self)
86
87
_ = headers
88
headers = OrderedDict()
89
for key, value in _.items():
90
success = False
91
92
for _ in headers:
93
if _.upper() == key.upper():
94
del headers[_]
95
break
96
97
if key.upper() not in (_.upper() for _ in getPublicTypeMembers(HTTP_HEADER, True)):
98
try:
99
headers[_str(key)] = value # dirty hack for http://bugs.python.org/issue12455
100
except UnicodeEncodeError: # don't do the hack on non-ASCII header names (they have to be properly encoded later on)
101
pass
102
else:
103
success = True
104
if not success:
105
key = '-'.join(_.capitalize() for _ in key.split('-'))
106
headers[key] = value
107
108
if conf.cj:
109
if HTTP_HEADER.COOKIE in headers:
110
for cookie in conf.cj:
111
if cookie is None or cookie.domain_specified and not (conf.hostname or "").endswith(cookie.domain):
112
continue
113
114
if ("%s=" % getUnicode(cookie.name)) in getUnicode(headers[HTTP_HEADER.COOKIE]):
115
if conf.loadCookies:
116
conf.httpHeaders = filterNone((item if item[0] != HTTP_HEADER.COOKIE else None) for item in conf.httpHeaders)
117
elif kb.mergeCookies is None:
118
message = "you provided a HTTP %s header value, while " % HTTP_HEADER.COOKIE
119
message += "target URL provides its own cookies within "
120
message += "HTTP %s header which intersect with yours. " % HTTP_HEADER.SET_COOKIE
121
message += "Do you want to merge them in further requests? [Y/n] "
122
123
kb.mergeCookies = readInput(message, default='Y', boolean=True)
124
125
if kb.mergeCookies and kb.injection.place != PLACE.COOKIE:
126
def _(value):
127
return re.sub(r"(?i)\b%s=[^%s]+" % (re.escape(getUnicode(cookie.name)), conf.cookieDel or DEFAULT_COOKIE_DELIMITER), ("%s=%s" % (getUnicode(cookie.name), getUnicode(cookie.value))).replace('\\', r'\\'), value)
128
129
headers[HTTP_HEADER.COOKIE] = _(headers[HTTP_HEADER.COOKIE])
130
131
if PLACE.COOKIE in conf.parameters:
132
conf.parameters[PLACE.COOKIE] = _(conf.parameters[PLACE.COOKIE])
133
134
conf.httpHeaders = [(item[0], item[1] if item[0] != HTTP_HEADER.COOKIE else _(item[1])) for item in conf.httpHeaders]
135
136
elif not kb.testMode:
137
headers[HTTP_HEADER.COOKIE] += "%s %s=%s" % (conf.cookieDel or DEFAULT_COOKIE_DELIMITER, getUnicode(cookie.name), getUnicode(cookie.value))
138
139
if kb.testMode and not any((conf.csrfToken, conf.safeUrl)):
140
resetCookieJar(conf.cj)
141
142
return headers
143
144
def parseResponse(page, headers, status=None):
145
"""
146
@param page: the page to parse to feed the knowledge base htmlFp
147
(back-end DBMS fingerprint based upon DBMS error messages return
148
through the web application) list and absFilePaths (absolute file
149
paths) set.
150
"""
151
152
if headers:
153
headersParser(headers)
154
155
if page:
156
htmlParser(page if not status else "%s\n\n%s" % (status, page))
157
158
@cachedmethod
159
def checkCharEncoding(encoding, warn=True):
160
"""
161
Checks encoding name, repairs common misspellings and adjusts to
162
proper namings used in codecs module
163
164
>>> checkCharEncoding('iso-8858', False)
165
'iso8859-1'
166
>>> checkCharEncoding('en_us', False)
167
'utf8'
168
"""
169
170
if isinstance(encoding, six.binary_type):
171
encoding = getUnicode(encoding)
172
173
if isListLike(encoding):
174
encoding = unArrayizeValue(encoding)
175
176
if encoding:
177
encoding = encoding.lower()
178
else:
179
return encoding
180
181
# Reference: http://www.destructor.de/charsets/index.htm
182
translate = {"windows-874": "iso-8859-11", "utf-8859-1": "utf8", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8", "ebcdic": "ebcdic-cp-be", "iso-8859": "iso8859-1", "iso-8859-0": "iso8859-1", "ansi": "ascii", "gbk2312": "gbk", "windows-31j": "cp932", "en": "us"}
183
184
for delimiter in (';', ',', '('):
185
if delimiter in encoding:
186
encoding = encoding[:encoding.find(delimiter)].strip()
187
188
encoding = encoding.replace("&quot", "")
189
190
# popular typos/errors
191
if "8858" in encoding:
192
encoding = encoding.replace("8858", "8859") # iso-8858 -> iso-8859
193
elif "8559" in encoding:
194
encoding = encoding.replace("8559", "8859") # iso-8559 -> iso-8859
195
elif "8895" in encoding:
196
encoding = encoding.replace("8895", "8859") # iso-8895 -> iso-8859
197
elif "5889" in encoding:
198
encoding = encoding.replace("5889", "8859") # iso-5889 -> iso-8859
199
elif "5589" in encoding:
200
encoding = encoding.replace("5589", "8859") # iso-5589 -> iso-8859
201
elif "2313" in encoding:
202
encoding = encoding.replace("2313", "2312") # gb2313 -> gb2312
203
elif encoding.startswith("x-"):
204
encoding = encoding[len("x-"):] # x-euc-kr -> euc-kr / x-mac-turkish -> mac-turkish
205
elif "windows-cp" in encoding:
206
encoding = encoding.replace("windows-cp", "windows") # windows-cp-1254 -> windows-1254
207
208
# name adjustment for compatibility
209
if encoding.startswith("8859"):
210
encoding = "iso-%s" % encoding
211
elif encoding.startswith("cp-"):
212
encoding = "cp%s" % encoding[3:]
213
elif encoding.startswith("euc-"):
214
encoding = "euc_%s" % encoding[4:]
215
elif encoding.startswith("windows") and not encoding.startswith("windows-"):
216
encoding = "windows-%s" % encoding[7:]
217
elif encoding.find("iso-88") > 0:
218
encoding = encoding[encoding.find("iso-88"):]
219
elif encoding.startswith("is0-"):
220
encoding = "iso%s" % encoding[4:]
221
elif encoding.find("ascii") > 0:
222
encoding = "ascii"
223
elif encoding.find("utf8") > 0:
224
encoding = "utf8"
225
elif encoding.find("utf-8") > 0:
226
encoding = "utf-8"
227
228
# Reference: http://philip.html5.org/data/charsets-2.html
229
if encoding in translate:
230
encoding = translate[encoding]
231
elif encoding in ("null", "{charset}", "charset", "*") or not re.search(r"\w", encoding):
232
return None
233
234
# Reference: http://www.iana.org/assignments/character-sets
235
# Reference: http://docs.python.org/library/codecs.html
236
try:
237
codecs.lookup(encoding)
238
except:
239
encoding = None
240
241
if encoding:
242
try:
243
six.text_type(getBytes(randomStr()), encoding)
244
except:
245
if warn:
246
warnMsg = "invalid web page charset '%s'" % encoding
247
singleTimeLogMessage(warnMsg, logging.WARN, encoding)
248
encoding = None
249
250
return encoding
251
252
def getHeuristicCharEncoding(page):
253
"""
254
Returns page encoding charset detected by usage of heuristics
255
256
Reference: https://chardet.readthedocs.io/en/latest/usage.html
257
258
>>> getHeuristicCharEncoding(b"<html></html>")
259
'ascii'
260
"""
261
262
key = hash(page)
263
retVal = kb.cache.encoding[key] if key in kb.cache.encoding else detect(page[:HEURISTIC_PAGE_SIZE_THRESHOLD])["encoding"]
264
kb.cache.encoding[key] = retVal
265
266
if retVal and retVal.lower().replace('-', "") == UNICODE_ENCODING.lower().replace('-', ""):
267
infoMsg = "heuristics detected web page charset '%s'" % retVal
268
singleTimeLogMessage(infoMsg, logging.INFO, retVal)
269
270
return retVal
271
272
def decodePage(page, contentEncoding, contentType, percentDecode=True):
273
"""
274
Decode compressed/charset HTTP response
275
276
>>> getText(decodePage(b"<html>foo&amp;bar</html>", None, "text/html; charset=utf-8"))
277
'<html>foo&bar</html>'
278
>>> getText(decodePage(b"&#x9;", None, "text/html; charset=utf-8"))
279
'\\t'
280
"""
281
282
if not page or (conf.nullConnection and len(page) < 2):
283
return getUnicode(page)
284
285
contentEncoding = contentEncoding.lower() if hasattr(contentEncoding, "lower") else ""
286
contentType = contentType.lower() if hasattr(contentType, "lower") else ""
287
288
if contentEncoding in ("gzip", "x-gzip", "deflate"):
289
if not kb.pageCompress:
290
return None
291
292
try:
293
if contentEncoding == "deflate":
294
data = io.BytesIO(zlib.decompress(page, -15)) # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
295
else:
296
data = gzip.GzipFile("", "rb", 9, io.BytesIO(page))
297
size = struct.unpack("<l", page[-4:])[0] # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py
298
if size > MAX_CONNECTION_TOTAL_SIZE:
299
raise Exception("size too large")
300
301
page = data.read()
302
except Exception as ex:
303
if b"<html" not in page: # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored)
304
errMsg = "detected invalid data for declared content "
305
errMsg += "encoding '%s' ('%s')" % (contentEncoding, getSafeExString(ex))
306
singleTimeLogMessage(errMsg, logging.ERROR)
307
308
warnMsg = "turning off page compression"
309
singleTimeWarnMessage(warnMsg)
310
311
kb.pageCompress = False
312
raise SqlmapCompressionException
313
314
if not conf.encoding:
315
httpCharset, metaCharset = None, None
316
317
# Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
318
if contentType.find("charset=") != -1:
319
httpCharset = checkCharEncoding(contentType.split("charset=")[-1])
320
321
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page))
322
323
if (any((httpCharset, metaCharset)) and (not all((httpCharset, metaCharset)) or isinstance(page, six.binary_type) and all(_ in PRINTABLE_BYTES for _ in page))) or (httpCharset == metaCharset and all((httpCharset, metaCharset))):
324
kb.pageEncoding = httpCharset or metaCharset # Reference: http://bytes.com/topic/html-css/answers/154758-http-equiv-vs-true-header-has-precedence
325
debugMsg = "declared web page charset '%s'" % kb.pageEncoding
326
singleTimeLogMessage(debugMsg, logging.DEBUG, debugMsg)
327
else:
328
kb.pageEncoding = None
329
else:
330
kb.pageEncoding = conf.encoding
331
332
# can't do for all responses because we need to support binary files too
333
if isinstance(page, six.binary_type) and "text/" in contentType:
334
if not kb.disableHtmlDecoding:
335
# e.g. &#x9;&#195;&#235;&#224;&#226;&#224;
336
if b"&#" in page:
337
page = re.sub(b"&#x([0-9a-f]{1,2});", lambda _: decodeHex(_.group(1) if len(_.group(1)) == 2 else b"0%s" % _.group(1)), page)
338
page = re.sub(b"&#(\\d{1,3});", lambda _: six.int2byte(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
339
340
# e.g. %20%28%29
341
if percentDecode:
342
if b"%" in page:
343
page = re.sub(b"%([0-9a-f]{2})", lambda _: decodeHex(_.group(1)), page)
344
page = re.sub(b"%([0-9A-F]{2})", lambda _: decodeHex(_.group(1)), page) # Note: %DeepSee_SQL in CACHE
345
346
# e.g. &amp;
347
page = re.sub(b"&([^;]+);", lambda _: six.int2byte(HTML_ENTITIES[getText(_.group(1))]) if HTML_ENTITIES.get(getText(_.group(1)), 256) < 256 else _.group(0), page)
348
349
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
350
351
if (kb.pageEncoding or "").lower() == "utf-8-sig":
352
kb.pageEncoding = "utf-8"
353
if page and page.startswith(b"\xef\xbb\xbf"): # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling)
354
page = page[3:]
355
356
page = getUnicode(page, kb.pageEncoding)
357
358
# e.g. &#8217;&#8230;&#8482;
359
if "&#" in page:
360
def _(match):
361
retVal = match.group(0)
362
try:
363
retVal = _unichr(int(match.group(1)))
364
except (ValueError, OverflowError):
365
pass
366
return retVal
367
page = re.sub(r"&#(\d+);", _, page)
368
369
# e.g. &zeta;
370
page = re.sub(r"&([^;]+);", lambda _: _unichr(HTML_ENTITIES[_.group(1)]) if HTML_ENTITIES.get(_.group(1), 0) > 255 else _.group(0), page)
371
else:
372
page = getUnicode(page, kb.pageEncoding)
373
374
return page
375
376
def processResponse(page, responseHeaders, code=None, status=None):
377
kb.processResponseCounter += 1
378
page = page or ""
379
380
parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None, status)
381
382
if not kb.tableFrom and Backend.getIdentifiedDbms() in (DBMS.ACCESS,):
383
kb.tableFrom = extractRegexResult(SELECT_FROM_TABLE_REGEX, page)
384
else:
385
kb.tableFrom = None
386
387
if conf.parseErrors:
388
msg = extractErrorMessage(page)
389
390
if msg:
391
logger.warning("parsed DBMS error message: '%s'" % msg.rstrip('.'))
392
393
if not conf.skipWaf and kb.processResponseCounter < IDENTYWAF_PARSE_LIMIT:
394
rawResponse = "%s %s %s\n%s\n%s" % (_http_client.HTTPConnection._http_vsn_str, code or "", status or "", "".join(getUnicode(responseHeaders.headers if responseHeaders else [])), page[:HEURISTIC_PAGE_SIZE_THRESHOLD])
395
396
with kb.locks.identYwaf:
397
identYwaf.non_blind.clear()
398
try:
399
if identYwaf.non_blind_check(rawResponse, silent=True):
400
for waf in set(identYwaf.non_blind):
401
if waf not in kb.identifiedWafs:
402
kb.identifiedWafs.add(waf)
403
errMsg = "WAF/IPS identified as '%s'" % identYwaf.format_name(waf)
404
singleTimeLogMessage(errMsg, logging.CRITICAL)
405
except Exception as ex:
406
singleTimeWarnMessage("internal error occurred in WAF/IPS detection ('%s')" % getSafeExString(ex))
407
408
if kb.originalPage is None:
409
for regex in (EVENTVALIDATION_REGEX, VIEWSTATE_REGEX):
410
match = re.search(regex, page)
411
if match and PLACE.POST in conf.parameters:
412
name, value = match.groups()
413
if PLACE.POST in conf.paramDict and name in conf.paramDict[PLACE.POST]:
414
if conf.paramDict[PLACE.POST][name] in page:
415
continue
416
else:
417
msg = "do you want to automatically adjust the value of '%s'? [y/N]" % name
418
419
if not readInput(msg, default='N', boolean=True):
420
continue
421
422
conf.paramDict[PLACE.POST][name] = value
423
conf.parameters[PLACE.POST] = re.sub(r"(?i)(%s=)[^&]+" % re.escape(name), r"\g<1>%s" % value.replace('\\', r'\\'), conf.parameters[PLACE.POST])
424
425
if not kb.browserVerification and re.search(r"(?i)browser.?verification", page or ""):
426
kb.browserVerification = True
427
warnMsg = "potential browser verification protection mechanism detected"
428
if re.search(r"(?i)CloudFlare", page):
429
warnMsg += " (CloudFlare)"
430
singleTimeWarnMessage(warnMsg)
431
432
if not kb.captchaDetected and re.search(r"(?i)captcha", page or ""):
433
for match in re.finditer(r"(?si)<form.+?</form>", page):
434
if re.search(r"(?i)captcha", match.group(0)):
435
kb.captchaDetected = True
436
break
437
438
if re.search(r"<meta[^>]+\brefresh\b[^>]+\bcaptcha\b", page):
439
kb.captchaDetected = True
440
441
if kb.captchaDetected:
442
warnMsg = "potential CAPTCHA protection mechanism detected"
443
if re.search(r"(?i)<title>[^<]*CloudFlare", page):
444
warnMsg += " (CloudFlare)"
445
singleTimeWarnMessage(warnMsg)
446
447
if re.search(BLOCKED_IP_REGEX, page):
448
warnMsg = "it appears that you have been blocked by the target server"
449
singleTimeWarnMessage(warnMsg)
450
451