Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sqlmapproject
GitHub Repository: sqlmapproject/sqlmap
Path: blob/master/lib/parse/html.py
2989 views
1
#!/usr/bin/env python
2
3
"""
4
Copyright (c) 2006-2025 sqlmap developers (https://sqlmap.org)
5
See the file 'LICENSE' for copying permission
6
"""
7
8
import re
9
10
from xml.sax.handler import ContentHandler
11
12
from lib.core.common import urldecode
13
from lib.core.common import parseXmlFile
14
from lib.core.data import kb
15
from lib.core.data import paths
16
from lib.core.settings import HEURISTIC_PAGE_SIZE_THRESHOLD
17
from lib.core.threads import getCurrentThreadData
18
19
class HTMLHandler(ContentHandler):
20
"""
21
This class defines methods to parse the input HTML page to
22
fingerprint the back-end database management system
23
"""
24
25
def __init__(self, page):
26
ContentHandler.__init__(self)
27
28
self._dbms = None
29
self._page = (page or "")
30
try:
31
self._lower_page = self._page.lower()
32
except SystemError: # https://bugs.python.org/issue18183
33
self._lower_page = None
34
self._urldecoded_page = urldecode(self._page)
35
36
self.dbms = None
37
38
def _markAsErrorPage(self):
39
threadData = getCurrentThreadData()
40
threadData.lastErrorPage = (threadData.lastRequestUID, self._page)
41
42
def startElement(self, name, attrs):
43
if self.dbms:
44
return
45
46
if name == "dbms":
47
self._dbms = attrs.get("value")
48
49
elif name == "error":
50
regexp = attrs.get("regexp")
51
if regexp not in kb.cache.regex:
52
keywords = re.findall(r"\w+", re.sub(r"\\.", " ", regexp))
53
keywords = sorted(keywords, key=len)
54
kb.cache.regex[regexp] = keywords[-1].lower()
55
56
if ('|' in regexp or kb.cache.regex[regexp] in (self._lower_page or kb.cache.regex[regexp])) and re.search(regexp, self._urldecoded_page, re.I):
57
self.dbms = self._dbms
58
self._markAsErrorPage()
59
kb.forkNote = kb.forkNote or attrs.get("fork")
60
61
def htmlParser(page):
62
"""
63
This function calls a class that parses the input HTML page to
64
fingerprint the back-end database management system
65
66
>>> from lib.core.enums import DBMS
67
>>> htmlParser("Warning: mysql_fetch_array() expects parameter 1 to be resource") == DBMS.MYSQL
68
True
69
>>> threadData = getCurrentThreadData()
70
>>> threadData.lastErrorPage = None
71
"""
72
73
page = page[:HEURISTIC_PAGE_SIZE_THRESHOLD]
74
75
xmlfile = paths.ERRORS_XML
76
handler = HTMLHandler(page)
77
key = hash(page)
78
79
# generic SQL warning/error messages
80
if re.search(r"SQL (warning|error|syntax)", page, re.I):
81
handler._markAsErrorPage()
82
83
if key in kb.cache.parsedDbms:
84
retVal = kb.cache.parsedDbms[key]
85
if retVal:
86
handler._markAsErrorPage()
87
return retVal
88
89
parseXmlFile(xmlfile, handler)
90
91
if handler.dbms and handler.dbms not in kb.htmlFp:
92
kb.lastParserStatus = handler.dbms
93
kb.htmlFp.append(handler.dbms)
94
else:
95
kb.lastParserStatus = None
96
97
kb.cache.parsedDbms[key] = handler.dbms
98
99
return handler.dbms
100
101