Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
maurosoria
GitHub Repository: maurosoria/dirsearch
Path: blob/master/lib/core/dictionary.py
896 views
1
# -*- coding: utf-8 -*-
2
# This program is free software; you can redistribute it and/or modify
3
# it under the terms of the GNU General Public License as published by
4
# the Free Software Foundation; either version 2 of the License, or
5
# (at your option) any later version.
6
#
7
# This program is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
# GNU General Public License for more details.
11
#
12
# You should have received a copy of the GNU General Public License
13
# along with this program; if not, write to the Free Software
14
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
15
# MA 02110-1301, USA.
16
#
17
# Author: Mauro Soria
18
19
from __future__ import annotations
20
21
import re
22
from typing import Any, Iterator
23
24
from lib.core.data import options
25
from lib.core.decorators import locked
26
from lib.core.settings import (
27
SCRIPT_PATH,
28
EXTENSION_TAG,
29
EXCLUDE_OVERWRITE_EXTENSIONS,
30
EXTENSION_RECOGNITION_REGEX,
31
)
32
from lib.core.structures import OrderedSet
33
from lib.parse.url import clean_path
34
from lib.utils.common import lstrip_once
35
from lib.utils.file import FileUtils
36
37
38
# Get ignore paths for status codes.
39
# Reference: https://github.com/maurosoria/dirsearch#Blacklist
40
def get_blacklists() -> dict[int, Dictionary]:
41
blacklists = {}
42
43
for status in [400, 403, 500]:
44
blacklist_file_name = FileUtils.build_path(SCRIPT_PATH, "db")
45
blacklist_file_name = FileUtils.build_path(
46
blacklist_file_name, f"{status}_blacklist.txt"
47
)
48
49
if not FileUtils.can_read(blacklist_file_name):
50
# Skip if cannot read file
51
continue
52
53
blacklists[status] = Dictionary(
54
files=[blacklist_file_name],
55
is_blacklist=True,
56
)
57
58
return blacklists
59
60
61
class Dictionary:
62
def __init__(self, **kwargs: Any) -> None:
63
self._index = 0
64
self._items = self.generate(**kwargs)
65
# Items in self._extra will be cleared when self.reset() is called
66
self._extra_index = 0
67
self._extra = []
68
69
@property
70
def index(self) -> int:
71
return self._index
72
73
@locked
74
def __next__(self) -> str:
75
if len(self._extra) > self._extra_index:
76
self._extra_index += 1
77
return self._extra[self._extra_index - 1]
78
elif len(self._items) > self._index:
79
self._index += 1
80
return self._items[self._index - 1]
81
else:
82
raise StopIteration
83
84
def __contains__(self, item: str) -> bool:
85
return item in self._items
86
87
def __getstate__(self) -> tuple[list[str], int]:
88
return self._items, self._index, self._extra, self._extra_index
89
90
def __setstate__(self, state: tuple[list[str], int]) -> None:
91
self._items, self._index, self._extra, self._extra_index = state
92
93
def __iter__(self) -> Iterator[str]:
94
return iter(self._items)
95
96
def __len__(self) -> int:
97
return len(self._items)
98
99
def generate(self, files: list[str] = [], is_blacklist: bool = False) -> list[str]:
100
"""
101
Dictionary.generate() behaviour
102
103
Classic dirsearch wordlist:
104
1. If %EXT% keyword is present, append one with each extension REPLACED.
105
2. If the special word is no present, append line unmodified.
106
107
Forced extensions wordlist (NEW):
108
This type of wordlist processing is a mix between classic processing
109
and DirBuster processing.
110
1. If %EXT% keyword is present in the line, immediately process as "classic dirsearch" (1).
111
2. If the line does not include the special word AND is NOT terminated by a slash,
112
append one with each extension APPENDED (line.ext) and ONLY ONE with a slash.
113
3. If the line does not include the special word and IS ALREADY terminated by slash,
114
append line unmodified.
115
"""
116
117
wordlist = OrderedSet()
118
re_ext_tag = re.compile(EXTENSION_TAG, re.IGNORECASE)
119
120
for dict_file in files:
121
for line in FileUtils.get_lines(dict_file):
122
# Removing leading "/" to work with prefixes later
123
line = lstrip_once(line, "/")
124
125
if not self.is_valid(line):
126
continue
127
128
# Classic dirsearch wordlist processing (with %EXT% keyword)
129
if EXTENSION_TAG in line.lower():
130
for extension in options["extensions"]:
131
newline = re_ext_tag.sub(extension, line)
132
wordlist.add(newline)
133
else:
134
wordlist.add(line)
135
136
# "Forcing extensions" and "overwriting extensions" shouldn't apply to
137
# blacklists otherwise it might cause false negatives
138
if is_blacklist:
139
continue
140
141
# If "forced extensions" is used and the path is not a directory (terminated by /)
142
# or has had an extension already, append extensions to the path
143
if (
144
options["force_extensions"]
145
and "." not in line
146
and not line.endswith("/")
147
):
148
wordlist.add(line + "/")
149
150
for extension in options["extensions"]:
151
wordlist.add(f"{line}.{extension}")
152
# Overwrite unknown extensions with selected ones (but also keep the origin)
153
elif (
154
options["overwrite_extensions"]
155
and not line.endswith(options["extensions"] + EXCLUDE_OVERWRITE_EXTENSIONS)
156
# Paths that have queries in wordlist are usually used for exploiting
157
# disclosed vulnerabilities of services, skip such paths
158
and "?" not in line
159
and "#" not in line
160
and re.search(EXTENSION_RECOGNITION_REGEX, line)
161
):
162
base = line.split(".")[0]
163
164
for extension in options["extensions"]:
165
wordlist.add(f"{base}.{extension}")
166
167
if not is_blacklist:
168
# Appending prefixes and suffixes
169
altered_wordlist = OrderedSet()
170
171
for path in wordlist:
172
for pref in options["prefixes"]:
173
if (
174
not path.startswith(("/", pref))
175
):
176
altered_wordlist.add(pref + path)
177
for suff in options["suffixes"]:
178
if (
179
not path.endswith(("/", suff))
180
# Appending suffixes to the URL fragment is useless
181
and "?" not in path
182
and "#" not in path
183
):
184
altered_wordlist.add(path + suff)
185
186
if altered_wordlist:
187
wordlist = altered_wordlist
188
189
if options["lowercase"]:
190
return list(map(str.lower, wordlist))
191
elif options["uppercase"]:
192
return list(map(str.upper, wordlist))
193
elif options["capitalization"]:
194
return list(map(str.capitalize, wordlist))
195
else:
196
return list(wordlist)
197
198
def is_valid(self, path: str) -> bool:
199
# Skip comments and empty lines
200
if not path or path.startswith("#"):
201
return False
202
203
# Skip if the path has excluded extensions
204
cleaned_path = clean_path(path)
205
if cleaned_path.endswith(
206
tuple(f".{extension}" for extension in options["exclude_extensions"])
207
):
208
return False
209
210
return True
211
212
def add_extra(self, path) -> None:
213
if path in self._items or path in self._extra:
214
return
215
216
self._extra.append(path)
217
218
def reset(self) -> None:
219
self._index = self._extra_index = 0
220
self._extra.clear()
221
222