Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
maurosoria
GitHub Repository: maurosoria/dirsearch
Path: blob/master/lib/core/scanner.py
896 views
1
# -*- coding: utf-8 -*-
2
# This program is free software; you can redistribute it and/or modify
3
# it under the terms of the GNU General Public License as published by
4
# the Free Software Foundation; either version 2 of the License, or
5
# (at your option) any later version.
6
#
7
# This program is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
# GNU General Public License for more details.
11
#
12
# You should have received a copy of the GNU General Public License
13
# along with this program; if not, write to the Free Software
14
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
15
# MA 02110-1301, USA.
16
#
17
# Author: Mauro Soria
18
19
from __future__ import annotations
20
21
import asyncio
22
import re
23
import time
24
from typing import Any
25
26
from lib.connection.requester import AsyncRequester, BaseRequester, Requester
27
from lib.connection.response import BaseResponse
28
from lib.core.data import options
29
from lib.core.logger import logger
30
from lib.core.settings import (
31
REFLECTED_PATH_MARKER,
32
TEST_PATH_LENGTH,
33
WILDCARD_TEST_POINT_MARKER,
34
)
35
from lib.parse.url import clean_path
36
from lib.utils.common import replace_path
37
from lib.utils.diff import DynamicContentParser, generate_matching_regex
38
from lib.utils.random import rand_string
39
40
41
class BaseScanner:
42
def __init__(
43
self,
44
requester: BaseRequester,
45
path: str = "",
46
tested: dict[str, Any] = {},
47
context: str = "all cases",
48
) -> None:
49
self.path = path
50
self.tested = tested
51
self.context = context
52
self.requester = requester
53
self.response = None
54
self.wildcard_redirect_regex = None
55
56
def check(self, path: str, response: BaseResponse) -> bool:
57
"""
58
Perform analyzing to see if the response is wildcard or not
59
"""
60
61
if self.response.status != response.status:
62
return True
63
64
# See the comment in generate_redirect_regex() to understand better
65
if self.wildcard_redirect_regex and response.redirect:
66
"""
67
We get rid of queries and DOM in generating redirect regex so we do the same here,
68
and we get rid of queries/DOM in path as well because queries in path are usually
69
reflected in the redirect as queries too (but we have already got rid of them).
70
"""
71
redirect = replace_path(
72
clean_path(response.redirect),
73
clean_path(path),
74
REFLECTED_PATH_MARKER,
75
)
76
77
# If redirection doesn't match the rule, mark as found
78
if not re.match(self.wildcard_redirect_regex, redirect, re.IGNORECASE):
79
logger.debug(
80
f'"{redirect}" doesn\'t match the regular expression "{self.wildcard_redirect_regex}", passing'
81
)
82
return True
83
84
if self.is_wildcard(response):
85
return False
86
87
return True
88
89
def get_duplicate(self, response: BaseResponse) -> BaseScanner | None:
90
for category in self.tested:
91
for tester in self.tested[category].values():
92
if response == tester.response:
93
return tester
94
95
return None
96
97
def is_wildcard(self, response: BaseResponse) -> bool:
98
"""Check if response is similar to wildcard response"""
99
100
# Compare 2 binary responses (Response.content is empty if the body is binary)
101
if not self.response.content and not response.content:
102
return self.response.body == response.body
103
104
return self.content_parser.compare_to(response.content)
105
106
@staticmethod
107
def generate_redirect_regex(first_loc: str, first_path: str, second_loc: str, second_path: str) -> str:
108
"""
109
From 2 redirects of wildcard responses, generate a regexp that matches
110
every wildcard redirect.
111
112
How it works:
113
1. Replace path in 2 redirect URLs (if it gets reflected in) with a mark
114
(e.g. /path1 -> /foo/path1 and /path2 -> /foo/path2 will become /foo[mark] for both)
115
2. Compare 2 redirects and generate a regex that matches both
116
(e.g. /foo[mark] and /foo[mark] will have the regex: ^/foo[mark]$)
117
3. To check if a redirect is wildcard, replace path with the mark and check if it matches this regex
118
(e.g. /path3 -> /bar/path3, the redirect becomes /bar[mark], which doesn't match the regex ^/foo[mark]$)
119
"""
120
121
if first_path:
122
first_loc = first_loc.replace("/" + first_path, REFLECTED_PATH_MARKER)
123
if second_path:
124
second_loc = second_loc.replace("/" + second_path, REFLECTED_PATH_MARKER)
125
126
return generate_matching_regex(first_loc, second_loc)
127
128
129
class Scanner(BaseScanner):
130
def __init__(
131
self,
132
requester: Requester,
133
*,
134
path: str = "",
135
tested: dict[str, dict[str, Scanner]] = {},
136
context: str = "all cases",
137
) -> None:
138
super().__init__(requester, path, tested, context)
139
self.setup()
140
141
def setup(self) -> None:
142
"""
143
Generate wildcard response information containers, this will be
144
used to compare with other path responses
145
"""
146
147
first_path = self.path.replace(
148
WILDCARD_TEST_POINT_MARKER,
149
rand_string(TEST_PATH_LENGTH),
150
)
151
first_response = self.requester.request(first_path)
152
self.response = first_response
153
time.sleep(options["delay"])
154
155
# Another test was performed before and has the same response as this
156
if duplicate := self.get_duplicate(first_response):
157
self.content_parser = duplicate.content_parser
158
self.wildcard_redirect_regex = duplicate.wildcard_redirect_regex
159
logger.debug(f'Skipped the second test for "{self.context}"')
160
return
161
162
second_path = self.path.replace(
163
WILDCARD_TEST_POINT_MARKER,
164
rand_string(TEST_PATH_LENGTH, omit=first_path),
165
)
166
second_response = self.requester.request(second_path)
167
time.sleep(options["delay"])
168
169
if first_response.redirect and second_response.redirect:
170
# Removing the queries (and DOM) with clean_path() because sometimes
171
# some queries that are assigned random values that are hard to deal with
172
self.wildcard_redirect_regex = self.generate_redirect_regex(
173
clean_path(first_response.redirect),
174
first_path,
175
clean_path(second_response.redirect),
176
second_path,
177
)
178
logger.debug(
179
f'Pattern (regex) to detect wildcard redirects for "{self.context}": {self.wildcard_redirect_regex}'
180
)
181
182
self.content_parser = DynamicContentParser(
183
first_response.content, second_response.content
184
)
185
186
187
class AsyncScanner(BaseScanner):
188
def __init__(
189
self,
190
requester: AsyncRequester,
191
*,
192
path: str = "",
193
tested: dict[str, dict[str, AsyncScanner]] = {},
194
context: str = "all cases",
195
) -> None:
196
super().__init__(requester, path, tested, context)
197
198
@classmethod
199
async def create(
200
cls,
201
requester: AsyncRequester,
202
*,
203
path: str = "",
204
tested: dict[str, dict[str, AsyncScanner]] = {},
205
context: str = "all cases",
206
) -> AsyncScanner:
207
self = cls(requester, path=path, tested=tested, context=context)
208
await self.setup()
209
return self
210
211
async def setup(self) -> None:
212
"""
213
Generate wildcard response information containers, this will be
214
used to compare with other path responses
215
"""
216
217
first_path = self.path.replace(
218
WILDCARD_TEST_POINT_MARKER,
219
rand_string(TEST_PATH_LENGTH),
220
)
221
first_response = await self.requester.request(first_path)
222
self.response = first_response
223
await asyncio.sleep(options["delay"])
224
225
duplicate = self.get_duplicate(first_response)
226
# Another test was performed before and has the same response as this
227
if duplicate:
228
self.content_parser = duplicate.content_parser
229
self.wildcard_redirect_regex = duplicate.wildcard_redirect_regex
230
logger.debug(f'Skipped the second test for "{self.context}"')
231
return
232
233
second_path = self.path.replace(
234
WILDCARD_TEST_POINT_MARKER,
235
rand_string(TEST_PATH_LENGTH, omit=first_path),
236
)
237
second_response = await self.requester.request(second_path)
238
await asyncio.sleep(options["delay"])
239
240
if first_response.redirect and second_response.redirect:
241
self.wildcard_redirect_regex = self.generate_redirect_regex(
242
clean_path(first_response.redirect),
243
first_path,
244
clean_path(second_response.redirect),
245
second_path,
246
)
247
logger.debug(
248
f'Pattern (regex) to detect wildcard redirects for "{self.context}": {self.wildcard_redirect_regex}'
249
)
250
251
self.content_parser = DynamicContentParser(
252
first_response.content, second_response.content
253
)
254
255