Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
maurosoria
GitHub Repository: maurosoria/dirsearch
Path: blob/master/lib/core/options.py
896 views
1
# -*- coding: utf-8 -*-
2
# This program is free software; you can redistribute it and/or modify
3
# it under the terms of the GNU General Public License as published by
4
# the Free Software Foundation; either version 2 of the License, or
5
# (at your option) any later version.
6
#
7
# This program is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
# GNU General Public License for more details.
11
#
12
# You should have received a copy of the GNU General Public License
13
# along with this program; if not, write to the Free Software
14
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
15
# MA 02110-1301, USA.
16
#
17
# Author: Mauro Soria
18
19
from __future__ import annotations
20
21
import os
22
import sys
23
import time
24
from optparse import Values
25
from typing import Any
26
from lib.core.settings import (
27
AUTHENTICATION_TYPES,
28
COMMON_EXTENSIONS,
29
DEFAULT_SESSION_DIR,
30
DEFAULT_TOR_PROXIES,
31
FILE_BASED_OUTPUT_FORMATS,
32
SCRIPT_PATH,
33
WORDLIST_CATEGORIES,
34
WORDLIST_CATEGORY_DIR,
35
)
36
from lib.parse.cmdline import parse_arguments
37
from lib.parse.config import ConfigParser
38
from lib.parse.headers import HeadersParser
39
from lib.utils.common import iprange, read_stdin, strip_and_uniquify
40
from lib.utils.file import File, FileUtils
41
from lib.parse.nmap import parse_nmap
42
43
44
def parse_options() -> dict[str, Any]:
45
opt = merge_config(parse_arguments())
46
47
def _session_debug(message: str) -> None:
48
if not os.environ.get("DIRSEARCH_SESSIONS_DEBUG"):
49
return
50
try:
51
sys.stderr.write(f"[sessions] {message}\n")
52
sys.stderr.flush()
53
except Exception:
54
return
55
56
if opt.list_sessions:
57
from lib.controller.session import SessionStore
58
59
base_dir = opt.sessions_dir or DEFAULT_SESSION_DIR
60
_session_debug(f"--list-sessions enabled base_dir={base_dir!r}")
61
session_store = SessionStore({})
62
sessions = session_store.list_sessions(base_dir)
63
_session_debug(f"--list-sessions completed total={len(sessions)}")
64
65
if not sessions:
66
print(f"No resumable sessions found in {base_dir}")
67
sys.exit(0)
68
69
print(f"Resumable sessions in {base_dir}:")
70
for index, session in enumerate(sessions, 1):
71
modified = time.strftime(
72
"%Y-%m-%d %H:%M:%S", time.localtime(session["modified"])
73
)
74
url = session["url"] or "(unknown target)"
75
print(
76
f"{index}. {session['path']} | {url} | "
77
f"targets left: {session['targets_left']} | "
78
f"dirs left: {session['directories_left']} | "
79
f"jobs done: {session['jobs_processed']} | "
80
f"errors: {session['errors']} | "
81
f"modified: {modified}"
82
)
83
sys.exit(0)
84
85
if opt.session_id and opt.session_file:
86
print("Use either --session or --session-id, not both.")
87
sys.exit(1)
88
89
if opt.session_id:
90
from lib.controller.session import SessionStore
91
92
base_dir = opt.sessions_dir or DEFAULT_SESSION_DIR
93
_session_debug(f"--session-id enabled base_dir={base_dir!r}")
94
session_store = SessionStore({})
95
sessions = session_store.list_sessions(base_dir)
96
_session_debug(f"--session-id sessions found total={len(sessions)}")
97
if not sessions:
98
print(f"No resumable sessions found in {base_dir}")
99
sys.exit(1)
100
try:
101
session_index = int(str(opt.session_id), 10)
102
except ValueError:
103
print(f"Invalid session id: {opt.session_id}")
104
sys.exit(1)
105
_session_debug(f"--session-id parsed index={session_index}")
106
if session_index < 1 or session_index > len(sessions):
107
print(
108
f"Session id out of range: {session_index} (1-{len(sessions)})"
109
)
110
sys.exit(1)
111
opt.session_file = sessions[session_index - 1]["path"]
112
_session_debug(f"--session-id resolved path={opt.session_file!r}")
113
114
if opt.session_file:
115
return vars(opt)
116
117
opt.http_method = opt.http_method.upper()
118
119
if opt.urls_file:
120
fd = _access_file(opt.urls_file)
121
opt.urls = fd.get_lines()
122
elif opt.cidr:
123
opt.urls = iprange(opt.cidr)
124
elif opt.stdin_urls:
125
opt.urls = read_stdin().splitlines(0)
126
elif opt.raw_file:
127
_access_file(opt.raw_file)
128
elif opt.nmap_report:
129
try:
130
opt.urls = parse_nmap(opt.nmap_report)
131
except Exception as e:
132
print("Error while parsing Nmap report: " + str(e))
133
sys.exit(1)
134
elif not opt.urls:
135
print("URL target is missing, try using -u <url>")
136
sys.exit(1)
137
138
if not opt.raw_file:
139
opt.urls = strip_and_uniquify(
140
filter(
141
lambda url: not url.startswith("#"),
142
opt.urls,
143
)
144
)
145
146
if not opt.extensions:
147
print("WARNING: No extension was specified!")
148
149
opt.wordlists = _resolve_wordlists(opt)
150
151
if opt.thread_count < 1:
152
print("Threads number must be greater than zero")
153
sys.exit(1)
154
155
if opt.tor:
156
opt.proxies = list(DEFAULT_TOR_PROXIES)
157
elif opt.proxies_file:
158
fd = _access_file(opt.proxies_file)
159
opt.proxies = fd.get_lines()
160
161
if opt.data_file:
162
fd = _access_file(opt.data_file)
163
opt.data = fd.get_lines()
164
165
if opt.cert_file:
166
_access_file(opt.cert_file)
167
168
if opt.key_file:
169
_access_file(opt.key_file)
170
171
headers = {}
172
173
if opt.headers_file:
174
try:
175
fd = _access_file(opt.headers_file)
176
headers.update(dict(HeadersParser(fd.read())))
177
except Exception as e:
178
print("Error in headers file: " + str(e))
179
sys.exit(1)
180
181
if opt.headers:
182
try:
183
headers.update(dict(HeadersParser("\n".join(opt.headers))))
184
except Exception:
185
print("Invalid headers")
186
sys.exit(1)
187
188
opt.headers = headers
189
190
if opt.user_agent:
191
opt.headers["user-agent"] = opt.user_agent
192
193
if opt.cookie:
194
opt.headers["cookie"] = opt.cookie
195
196
opt.include_status_codes = _parse_status_codes(opt.include_status_codes)
197
opt.exclude_status_codes = _parse_status_codes(opt.exclude_status_codes)
198
opt.recursion_status_codes = _parse_status_codes(opt.recursion_status_codes)
199
opt.skip_on_status = _parse_status_codes(opt.skip_on_status)
200
opt.prefixes = tuple(strip_and_uniquify(opt.prefixes.split(",")))
201
opt.suffixes = tuple(strip_and_uniquify(opt.suffixes.split(",")))
202
opt.subdirs = [
203
subdir.lstrip("/")
204
for subdir in strip_and_uniquify(
205
[
206
subdir if subdir.endswith("/") else subdir + "/"
207
for subdir in opt.subdirs.split(",")
208
]
209
)
210
]
211
opt.exclude_subdirs = [
212
subdir.lstrip("/")
213
for subdir in strip_and_uniquify(
214
[
215
subdir if subdir.endswith("/") else subdir + "/"
216
for subdir in opt.exclude_subdirs.split(",")
217
]
218
)
219
]
220
opt.exclude_sizes = {size.strip().upper() for size in opt.exclude_sizes.split(",")}
221
222
if opt.extensions == "*":
223
opt.extensions = COMMON_EXTENSIONS
224
elif opt.extensions == "CHANGELOG.md":
225
print(
226
"A weird extension was provided: 'CHANGELOG.md'. Please do not use * as the "
227
"extension or enclose it in double quotes"
228
)
229
sys.exit(0)
230
else:
231
opt.extensions = tuple(
232
strip_and_uniquify(
233
[extension.lstrip(".") for extension in opt.extensions.split(",")]
234
)
235
)
236
237
opt.exclude_extensions = tuple(
238
strip_and_uniquify(
239
[
240
exclude_extension.lstrip(".")
241
for exclude_extension in opt.exclude_extensions.split(",")
242
]
243
)
244
)
245
246
if opt.auth and not opt.auth_type:
247
print("Please select the authentication type with --auth-type")
248
sys.exit(1)
249
elif opt.auth_type and not opt.auth:
250
print("No authentication credential found")
251
sys.exit(1)
252
elif opt.auth and opt.auth_type not in AUTHENTICATION_TYPES:
253
print(
254
f"'{opt.auth_type}' is not in available authentication "
255
f"types: {', '.join(AUTHENTICATION_TYPES)}"
256
)
257
sys.exit(1)
258
259
if set(opt.extensions).intersection(opt.exclude_extensions):
260
print(
261
"Exclude extension list can not contain any extension "
262
"that has already in the extension list"
263
)
264
sys.exit(1)
265
266
opt.output_formats = [format.strip() for format in opt.output_formats.split(",") if format]
267
268
invalid_formats = set(opt.output_formats).difference(FILE_BASED_OUTPUT_FORMATS)
269
if invalid_formats:
270
print(f"Invalid output format(s): {', '.join(invalid_formats)}")
271
sys.exit(1)
272
273
if not len(opt.output_formats) and opt.output_file:
274
print("Please provide output formats (use '-O')")
275
sys.exit(1)
276
277
# There are multiple file-based output formats but no variable to separate output files for different formats
278
if (
279
opt.output_file
280
and "{format}" not in opt.output_file
281
and len(opt.output_formats) > 1
282
and (
283
"{extension}" not in opt.output_file
284
# "plain" and "simple" have the same file extension (txt)
285
or {"plain", "simple"}.issubset(opt.output_formats)
286
)
287
):
288
print("Found at least 2 output formats sharing the same output file, make sure you use '{format}' and '{extension} variables in your output file")
289
sys.exit(1)
290
291
if opt.mysql_url:
292
opt.output_formats.append("mysql")
293
294
if opt.postgres_url:
295
opt.output_formats.append("postgresql")
296
297
if opt.log_file:
298
opt.log_file = FileUtils.get_abs_path(opt.log_file)
299
300
if opt.output_file:
301
opt.output_file = FileUtils.get_abs_path(opt.output_file)
302
303
return vars(opt)
304
305
306
def _parse_status_codes(str_: str) -> set[int]:
307
if not str_:
308
return set()
309
310
status_codes: set[int] = set()
311
312
for status_code in str_.split(","):
313
try:
314
if "-" in status_code:
315
start, end = status_code.strip().split("-")
316
status_codes.update(range(int(start), int(end) + 1))
317
else:
318
status_codes.add(int(status_code.strip()))
319
except ValueError:
320
print(f"Invalid status code or status code range: {status_code}")
321
sys.exit(1)
322
323
return status_codes
324
325
326
def _access_file(path: str) -> File:
327
with File(path) as fd:
328
if not fd.exists():
329
print(f"{path} does not exist")
330
sys.exit(1)
331
332
if not fd.is_valid():
333
print(f"{path} is not a file")
334
sys.exit(1)
335
336
if not fd.can_read():
337
print(f"{path} cannot be read")
338
sys.exit(1)
339
340
return fd
341
342
343
def _split_csv(value: str | None) -> list[str]:
344
if not value:
345
return []
346
return [entry.strip() for entry in value.split(",") if entry.strip()]
347
348
349
def _resolve_wordlist_categories(categories: list[str]) -> list[str]:
350
if not categories:
351
return []
352
353
normalized = [category.strip() for category in categories if category.strip()]
354
include_all = any(category.lower() in ("all", "*") for category in normalized)
355
356
if include_all:
357
return [
358
FileUtils.build_path(WORDLIST_CATEGORY_DIR, filename)
359
for filename in WORDLIST_CATEGORIES.values()
360
]
361
362
resolved = []
363
unknown = []
364
for category in normalized:
365
key = category.lower()
366
if key.endswith("*"):
367
prefix = key[:-1]
368
matches = [
369
filename
370
for name, filename in WORDLIST_CATEGORIES.items()
371
if name.startswith(prefix)
372
]
373
if matches:
374
resolved.extend(
375
FileUtils.build_path(WORDLIST_CATEGORY_DIR, filename)
376
for filename in matches
377
)
378
continue
379
380
filename = WORDLIST_CATEGORIES.get(key)
381
if filename:
382
resolved.append(FileUtils.build_path(WORDLIST_CATEGORY_DIR, filename))
383
else:
384
unknown.append(category)
385
386
if unknown:
387
print(f"Unknown wordlist categories: {', '.join(unknown)}")
388
print(
389
"Available categories: "
390
+ ", ".join(sorted(WORDLIST_CATEGORIES.keys()))
391
)
392
sys.exit(1)
393
394
return resolved
395
396
397
def _resolve_wordlists(opt: Values) -> list[str]:
398
wordlists = []
399
wordlists.extend(_split_csv(opt.wordlists))
400
wordlists.extend(
401
_resolve_wordlist_categories(_split_csv(opt.wordlist_categories))
402
)
403
404
if not wordlists:
405
wordlists = [FileUtils.build_path(SCRIPT_PATH, "db", "dicc.txt")]
406
407
expanded = []
408
for wordlist in wordlists:
409
if FileUtils.is_dir(wordlist):
410
expanded.extend(FileUtils.get_files(wordlist))
411
else:
412
expanded.append(wordlist)
413
414
unique = []
415
seen = set()
416
for path in expanded:
417
if path in seen:
418
continue
419
seen.add(path)
420
unique.append(path)
421
422
for path in unique:
423
_access_file(path)
424
425
return unique
426
427
428
def merge_config(opt: Values) -> Values:
429
config = ConfigParser()
430
config.read(opt.config)
431
432
# General
433
opt.thread_count = opt.thread_count or config.safe_getint("general", "threads", 25)
434
opt.async_mode = opt.async_mode or config.safe_getboolean("general", "async")
435
opt.filter_threshold = opt.filter_threshold or config.safe_getint("general", "filter-threshold", 0)
436
opt.include_status_codes = opt.include_status_codes or config.safe_get(
437
"general", "include-status"
438
)
439
opt.exclude_status_codes = opt.exclude_status_codes or config.safe_get(
440
"general", "exclude-status"
441
)
442
opt.exclude_sizes = opt.exclude_sizes or config.safe_get(
443
"general", "exclude-sizes", ""
444
)
445
opt.exclude_texts = opt.exclude_texts or config.safe_getlist(
446
"general", "exclude-texts"
447
)
448
opt.exclude_regex = opt.exclude_regex or config.safe_get("general", "exclude-regex")
449
opt.exclude_redirect = opt.exclude_redirect or config.safe_get(
450
"general", "exclude-redirect"
451
)
452
opt.exclude_response = opt.exclude_response or config.safe_get(
453
"general", "exclude-response"
454
)
455
opt.recursive = opt.recursive or config.safe_getboolean("general", "recursive")
456
opt.deep_recursive = opt.deep_recursive or config.safe_getboolean(
457
"general", "deep-recursive"
458
)
459
opt.force_recursive = opt.force_recursive or config.safe_getboolean(
460
"general", "force-recursive"
461
)
462
opt.recursion_depth = opt.recursion_depth or config.safe_getint(
463
"general", "max-recursion-depth"
464
)
465
opt.recursion_status_codes = opt.recursion_status_codes or config.safe_get(
466
"general", "recursion-status", "100-999"
467
)
468
opt.subdirs = opt.subdirs or config.safe_get("general", "subdirs", "")
469
opt.exclude_subdirs = opt.exclude_subdirs or config.safe_get(
470
"general", "exclude-subdirs", ""
471
)
472
opt.skip_on_status = opt.skip_on_status or config.safe_get(
473
"general", "skip-on-status", ""
474
)
475
opt.max_time = opt.max_time or config.safe_getint("general", "max-time")
476
opt.target_max_time = opt.target_max_time or config.safe_getint(
477
"general", "target-max-time"
478
)
479
opt.exit_on_error = opt.exit_on_error or config.safe_getboolean(
480
"general", "exit-on-error"
481
)
482
483
# Dictionary
484
opt.wordlists = opt.wordlists or config.safe_get("dictionary", "wordlists")
485
opt.wordlist_categories = opt.wordlist_categories or config.safe_get(
486
"dictionary", "wordlist-categories"
487
)
488
opt.extensions = opt.extensions or config.safe_get(
489
"dictionary", "default-extensions", ""
490
)
491
opt.force_extensions = opt.force_extensions or config.safe_getboolean(
492
"dictionary", "force-extensions"
493
)
494
opt.overwrite_extensions = opt.overwrite_extensions or config.safe_getboolean(
495
"dictionary", "overwrite-extensions"
496
)
497
opt.exclude_extensions = opt.exclude_extensions or config.safe_get(
498
"dictionary", "exclude-extensions", ""
499
)
500
opt.prefixes = opt.prefixes or config.safe_get("dictionary", "prefixes", "")
501
opt.suffixes = opt.suffixes or config.safe_get("dictionary", "suffixes", "")
502
opt.lowercase = opt.lowercase or config.safe_getboolean("dictionary", "lowercase")
503
opt.uppercase = opt.uppercase or config.safe_getboolean("dictionary", "uppercase")
504
opt.capital = opt.capital or config.safe_getboolean(
505
"dictionary", "capital"
506
)
507
508
# Request
509
opt.http_method = opt.http_method or config.safe_get(
510
"request", "http-method", "get"
511
)
512
opt.headers = opt.headers or config.safe_getlist("request", "headers")
513
opt.headers_file = opt.headers_file or config.safe_get("request", "headers-file")
514
opt.follow_redirects = opt.follow_redirects or config.safe_getboolean(
515
"request", "follow-redirects"
516
)
517
opt.random_agents = opt.random_agents or config.safe_getboolean(
518
"request", "random-user-agents"
519
)
520
opt.user_agent = opt.user_agent or config.safe_get("request", "user-agent")
521
opt.cookie = opt.cookie or config.safe_get("request", "cookie")
522
523
# Connection
524
opt.delay = opt.delay or config.safe_getfloat("connection", "delay")
525
opt.timeout = opt.timeout or config.safe_getfloat("connection", "timeout", 7.5)
526
opt.max_retries = opt.max_retries or config.safe_getint(
527
"connection", "max-retries", 1
528
)
529
opt.max_rate = opt.max_rate or config.safe_getint("connection", "max-rate")
530
opt.proxies = opt.proxies or config.safe_getlist("connection", "proxies")
531
opt.proxies_file = opt.proxies_file or config.safe_get("connection", "proxies-file")
532
opt.scheme = opt.scheme or config.safe_get(
533
"connection", "scheme", None, ("http", "https")
534
)
535
opt.replay_proxy = opt.replay_proxy or config.safe_get("connection", "replay-proxy")
536
opt.network_interface = opt.network_interface or config.safe_get(
537
"connection", "network-interface"
538
)
539
540
# Advanced
541
opt.crawl = opt.crawl or config.safe_getboolean("advanced", "crawl")
542
543
# View
544
opt.full_url = opt.full_url or config.safe_getboolean("view", "full-url")
545
opt.color = opt.color if opt.color is False else config.safe_getboolean("view", "color", True)
546
opt.quiet = opt.quiet or config.safe_getboolean("view", "quiet-mode")
547
opt.disable_cli = opt.disable_cli or config.safe_getboolean("view", "disable-cli")
548
opt.redirects_history = opt.redirects_history or config.safe_getboolean(
549
"view", "show-redirects-history"
550
)
551
552
# Output
553
opt.output_file = opt.output_file or config.safe_get("output", "output-file")
554
opt.mysql_url = opt.mysql_url or config.safe_get("output", "mysql-url")
555
opt.postgres_url = opt.postgres_url or config.safe_get("output", "postgres-url")
556
opt.output_table = config.safe_get("output", "output-sql-table")
557
opt.output_formats = opt.output_formats or config.safe_get(
558
"output", "output-formats", "plain"
559
)
560
opt.log_file = opt.log_file or config.safe_get("output", "log-file")
561
opt.log_file_size = config.safe_getint("output", "log-file-size")
562
563
return opt
564
565