Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
maurosoria
GitHub Repository: maurosoria/dirsearch
Path: blob/master/lib/controller/controller.py
896 views
1
# -*- coding: utf-8 -*-
2
# This program is free software; you can redistribute it and/or modify
3
# it under the terms of the GNU General Public License as published by
4
# the Free Software Foundation; either version 2 of the License, or
5
# (at your option) any later version.
6
#
7
# This program is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
# GNU General Public License for more details.
11
#
12
# You should have received a copy of the GNU General Public License
13
# along with this program; if not, write to the Free Software
14
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
15
# MA 02110-1301, USA.
16
#
17
# Author: Mauro Soria
18
19
from __future__ import annotations
20
21
import asyncio
22
import gc
23
import os
24
import sys
25
import shutil
26
import signal
27
import sys
28
import psycopg
29
import re
30
import time
31
import mysql.connector
32
from typing import Any
33
34
from urllib.parse import urlparse
35
36
from lib.connection.dns import cache_dns
37
from lib.connection.response import BaseResponse
38
from lib.core.data import blacklists, options
39
from lib.core.decorators import locked
40
from lib.core.dictionary import Dictionary, get_blacklists
41
from lib.core.exceptions import (
42
CannotConnectException,
43
FileExistsException,
44
InvalidRawRequest,
45
InvalidURLException,
46
RequestException,
47
SkipTargetInterrupt,
48
QuitInterrupt,
49
UnpicklingError,
50
)
51
from lib.core.logger import enable_logging, logger
52
from lib.core.settings import (
53
BANNER,
54
DEFAULT_HEADERS,
55
DEFAULT_SESSION_FILE,
56
EXTENSION_RECOGNITION_REGEX,
57
MAX_CONSECUTIVE_REQUEST_ERRORS,
58
NEW_LINE,
59
SIGINT_FORCE_QUIT_THRESHOLD,
60
SIGINT_WINDOW_SECONDS,
61
STANDARD_PORTS,
62
START_TIME,
63
UNKNOWN,
64
)
65
from lib.parse.rawrequest import parse_raw
66
from lib.parse.url import clean_path, parse_path
67
from lib.report.manager import ReportManager
68
from lib.utils.common import lstrip_once
69
from lib.utils.crawl import Crawler
70
from lib.utils.file import FileUtils
71
from lib.utils.schemedet import detect_scheme
72
from lib.view.terminal import interface
73
from lib.controller.session import SessionStore
74
75
76
class ForceQuitHandler:
77
"""Strategy for handling force quit on repeated Ctrl+C.
78
79
Different platforms have different signal handling behaviors. This base
80
class defines the interface, with subclasses implementing platform-specific
81
logic.
82
"""
83
84
def check_force_quit(self) -> bool:
85
"""Check if force quit should be triggered.
86
87
Returns True if force quit was triggered (program will exit).
88
"""
89
raise NotImplementedError
90
91
def on_pause_start(self) -> None:
92
"""Called when pause mode is entered."""
93
pass
94
95
def on_resume(self) -> None:
96
"""Called when resuming from pause."""
97
pass
98
99
100
class StandardForceQuitHandler(ForceQuitHandler):
101
"""Force quit handler for standard platforms.
102
103
Immediately exits on any Ctrl+C during pause mode.
104
"""
105
106
def check_force_quit(self) -> bool:
107
interface.warning("\nForce quit!", do_save=False)
108
os._exit(1)
109
return True # Unreachable, but satisfies type checker
110
111
112
class PyInstallerLinuxForceQuitHandler(ForceQuitHandler):
113
"""Force quit handler for PyInstaller Linux builds.
114
115
PyInstaller on Linux has signal handling quirks that require multiple
116
rapid Ctrl+C presses to force quit. Uses SIGKILL for reliable termination.
117
"""
118
119
def __init__(self) -> None:
120
self._sigint_count = 0
121
self._last_sigint_time = 0.0
122
123
def check_force_quit(self) -> bool:
124
now = time.monotonic()
125
if now - self._last_sigint_time <= SIGINT_WINDOW_SECONDS:
126
self._sigint_count += 1
127
else:
128
self._sigint_count = 1
129
self._last_sigint_time = now
130
131
if self._sigint_count >= SIGINT_FORCE_QUIT_THRESHOLD:
132
interface.warning("\nForce quit!", do_save=False)
133
os.kill(os.getpid(), signal.SIGKILL)
134
os._exit(1)
135
return False
136
137
def on_pause_start(self) -> None:
138
self._sigint_count = 1
139
self._last_sigint_time = time.monotonic()
140
141
def on_resume(self) -> None:
142
self._sigint_count = 0
143
144
145
def _create_force_quit_handler() -> ForceQuitHandler:
146
"""Factory function to create the appropriate force quit handler."""
147
is_pyinstaller_linux = (
148
getattr(sys, "frozen", False) and sys.platform.startswith("linux")
149
)
150
if is_pyinstaller_linux:
151
return PyInstallerLinuxForceQuitHandler()
152
return StandardForceQuitHandler()
153
154
155
def format_session_path(path: str) -> str:
156
date_token = START_TIME.split()[0]
157
datetime_token = START_TIME.replace(" ", "_")
158
# Make session paths cross-platform (Windows disallows ":" in file/folder names).
159
datetime_token = datetime_token.replace(":", "-")
160
return path.replace("{date}", date_token).replace("{datetime}", datetime_token)
161
162
163
class Controller:
164
def __init__(self) -> None:
165
self._handling_pause = False
166
self._force_quit_handler = _create_force_quit_handler()
167
self.loop = None # Will be set if async mode is used
168
169
if options["session_file"]:
170
self._import(options["session_file"])
171
if not hasattr(self, "old_session"):
172
self.old_session = True
173
else:
174
self.setup()
175
self.old_session = False
176
177
self.run()
178
179
def _import(self, session_file: str) -> None:
180
try:
181
if os.path.isfile(session_file) and session_file.endswith((".pickle", ".pkl")):
182
interface.warning(
183
"Pickle session files are no longer supported. "
184
"Please start a new scan to create a JSON session."
185
)
186
sys.exit(1)
187
session_store = SessionStore(options)
188
payload = session_store.load(session_file)
189
# Keep the explicit session path so resume/overwrite works as expected.
190
loaded_session_file = session_file
191
options.update(session_store.restore_options(payload["options"]))
192
options["session_file"] = loaded_session_file
193
if options["log_file"]:
194
try:
195
FileUtils.create_dir(FileUtils.parent(options["log_file"]))
196
if not FileUtils.can_write(options["log_file"]):
197
raise Exception
198
enable_logging()
199
except Exception:
200
interface.error(
201
f'Couldn\'t create log file at {options["log_file"]}'
202
)
203
sys.exit(1)
204
output_history = payload.get("output_history") or []
205
if not output_history:
206
legacy_output = payload.get("last_output", "")
207
if legacy_output:
208
start_time = payload.get("controller", {}).get("start_time")
209
output_history = [
210
{"start_time": start_time, "output": legacy_output}
211
]
212
self.output_history = output_history
213
if output_history:
214
last_output = self._format_output_history(output_history)
215
else:
216
last_output = ""
217
session_store.apply_to_controller(self, payload)
218
self._confirm_session_overwrite(session_file)
219
except (OSError, KeyError, TypeError, UnpicklingError):
220
interface.error(
221
f"{session_file} is not a valid session file or it's in an old format"
222
)
223
sys.exit(1)
224
print(last_output)
225
226
def _format_output_history(self, output_history: list[dict[str, Any]]) -> str:
227
formatted: list[str] = []
228
for entry in output_history:
229
if not isinstance(entry, dict):
230
continue
231
output = entry.get("output")
232
if not output:
233
continue
234
start_time = entry.get("start_time")
235
if isinstance(start_time, (int, float)):
236
start_label = time.strftime(
237
"%Y-%m-%d %H:%M:%S", time.localtime(start_time)
238
)
239
formatted.append(f"--- Previous run started: {start_label} ---")
240
else:
241
formatted.append("--- Previous run ---")
242
formatted.append(output.rstrip())
243
return "\n".join(formatted).rstrip()
244
245
def _confirm_session_overwrite(self, session_file: str) -> None:
246
interface.in_line(
247
f"Resume session from {session_file}. Overwrite on save? [o]verwrite/[n]ew: "
248
)
249
choice = input().strip().lower()
250
if choice == "n":
251
options["session_file"] = None
252
253
def _export(self, session_file: str) -> None:
254
# Save written output
255
last_output = interface.buffer.rstrip()
256
session_file = format_session_path(session_file)
257
parent_dir = FileUtils.parent(session_file)
258
if parent_dir:
259
FileUtils.create_dir(parent_dir)
260
261
session_store = SessionStore(options)
262
session_store.save(self, session_file, last_output)
263
264
265
def setup(self) -> None:
266
blacklists.update(get_blacklists())
267
268
if options["raw_file"]:
269
try:
270
options.update(
271
zip(
272
["urls", "http_method", "headers", "data"],
273
parse_raw(options["raw_file"]),
274
)
275
)
276
except InvalidRawRequest as e:
277
print(str(e))
278
sys.exit(1)
279
else:
280
options["headers"] = {**DEFAULT_HEADERS, **options["headers"]}
281
282
self.dictionary = Dictionary(files=options["wordlists"])
283
self.start_time = time.time()
284
self.passed_urls: set[str] = set()
285
self.directories: list[str] = []
286
self.jobs_processed = 0
287
self.errors = 0
288
self.consecutive_errors = 0
289
290
if options["log_file"]:
291
try:
292
FileUtils.create_dir(FileUtils.parent(options["log_file"]))
293
if not FileUtils.can_write(options["log_file"]):
294
raise Exception
295
296
enable_logging()
297
298
except Exception:
299
interface.error(
300
f'Couldn\'t create log file at {options["log_file"]}'
301
)
302
sys.exit(1)
303
304
interface.header(BANNER)
305
interface.config(len(self.dictionary))
306
307
try:
308
self.reporter = ReportManager(options["output_formats"])
309
except (
310
InvalidURLException,
311
mysql.connector.Error,
312
psycopg.Error,
313
) as e:
314
logger.exception(e)
315
interface.error(str(e))
316
sys.exit(1)
317
318
if options["log_file"]:
319
interface.log_file(options["log_file"])
320
321
def run(self) -> None:
322
if options["async_mode"]:
323
from lib.connection.requester import AsyncRequester as Requester
324
from lib.core.fuzzer import AsyncFuzzer as Fuzzer
325
326
try:
327
import uvloop
328
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
329
except ImportError:
330
pass
331
else:
332
from lib.connection.requester import Requester
333
from lib.core.fuzzer import Fuzzer
334
335
# match_callbacks and not_found_callbacks callback values:
336
# - *args[0]: lib.connection.Response() object
337
#
338
# error_callbacks callback values:
339
# - *args[0]: exception
340
match_callbacks = (
341
self.match_callback, self.reporter.save, self.reset_consecutive_errors
342
)
343
not_found_callbacks = (
344
self.update_progress_bar, self.reset_consecutive_errors
345
)
346
error_callbacks = (self.raise_error, self.append_error_log)
347
348
self.requester = Requester()
349
if options["async_mode"]:
350
self.loop = asyncio.new_event_loop()
351
352
signal.signal(signal.SIGINT, lambda *_: self.handle_pause())
353
signal.signal(signal.SIGTERM, lambda *_: self.handle_pause())
354
355
while options["urls"]:
356
url = options["urls"][0]
357
self.fuzzer = Fuzzer(
358
self.requester,
359
self.dictionary,
360
match_callbacks=match_callbacks,
361
not_found_callbacks=not_found_callbacks,
362
error_callbacks=error_callbacks,
363
)
364
365
try:
366
self.set_target(url)
367
368
if not self.directories:
369
for subdir in options["subdirs"]:
370
self.add_directory(self.base_path + subdir)
371
372
if not self.old_session:
373
interface.target(self.url)
374
375
self.reporter.prepare(self.url)
376
self.start()
377
378
except (
379
CannotConnectException,
380
FileExistsException,
381
InvalidURLException,
382
RequestException,
383
SkipTargetInterrupt,
384
KeyboardInterrupt,
385
) as e:
386
self.directories.clear()
387
self.dictionary.reset()
388
389
if e.args:
390
interface.error(str(e))
391
392
except QuitInterrupt as e:
393
self.reporter.finish()
394
interface.error(e.args[0])
395
sys.exit(0)
396
397
finally:
398
options["urls"].pop(0)
399
400
interface.warning("\nTask Completed")
401
self.reporter.finish()
402
403
if options["session_file"]:
404
try:
405
if os.path.isdir(options["session_file"]):
406
shutil.rmtree(options["session_file"])
407
else:
408
os.remove(options["session_file"])
409
except Exception:
410
interface.error("Failed to delete old session file, remove it to free some space")
411
412
def start(self) -> None:
413
start_time = time.time()
414
415
while self.directories:
416
try:
417
gc.collect()
418
419
current_directory = self.directories[0]
420
421
if not self.old_session:
422
current_time = time.strftime("%H:%M:%S")
423
msg = f"{NEW_LINE}[{current_time}] Scanning: {current_directory}"
424
425
interface.warning(msg)
426
427
self.fuzzer.set_base_path(current_directory)
428
if options["async_mode"]:
429
# use a future to get exceptions from handle_pause
430
# https://stackoverflow.com/a/64230941
431
self.pause_future = self.loop.create_future()
432
self.loop.run_until_complete(self.start_coroutines(start_time))
433
else:
434
self.fuzzer.start()
435
self.process(start_time)
436
437
except (KeyboardInterrupt, asyncio.CancelledError):
438
pass
439
440
finally:
441
self.dictionary.reset()
442
self.directories.pop(0)
443
444
self.jobs_processed += 1
445
self.old_session = False
446
447
async def start_coroutines(self, start_time: float) -> None:
448
task = self.loop.create_task(self.fuzzer.start())
449
timeout = min(
450
t for t in [
451
options["max_time"] - (time.time() - self.start_time),
452
options["target_max_time"] - (time.time() - start_time),
453
] if t > 0
454
) if options["max_time"] or options["target_max_time"] else None
455
456
try:
457
await asyncio.wait_for(
458
asyncio.wait(
459
[self.pause_future, task],
460
return_when=asyncio.FIRST_COMPLETED,
461
),
462
timeout=timeout,
463
)
464
except asyncio.TimeoutError:
465
if time.time() - self.start_time > options["max_time"] > 0:
466
raise QuitInterrupt("Runtime exceeded the maximum set by the user")
467
468
raise SkipTargetInterrupt("Runtime for target exceeded the maximum set by the user")
469
470
if self.pause_future.done():
471
task.cancel()
472
await self.pause_future # propagate the exception, if raised
473
474
await task # propagate the exception, if raised
475
476
def process(self, start_time: float) -> None:
477
while True:
478
while not self.fuzzer.is_finished():
479
now = time.time()
480
if now - self.start_time > options["max_time"] > 0:
481
raise QuitInterrupt(
482
"Runtime exceeded the maximum set by the user"
483
)
484
if now - start_time > options["target_max_time"] > 0:
485
raise SkipTargetInterrupt(
486
"Runtime for target exceeded the maximum set by the user"
487
)
488
489
time.sleep(0.5)
490
491
break
492
493
def set_target(self, url: str) -> None:
494
# If no scheme specified, unset it first
495
if "://" not in url:
496
url = f'{options["scheme"] or UNKNOWN}://{url}'
497
if not url.endswith("/"):
498
url += "/"
499
500
parsed = urlparse(url)
501
self.base_path = lstrip_once(parsed.path, "/")
502
503
# Credentials in URL
504
if "@" in parsed.netloc:
505
cred, parsed.netloc = parsed.netloc.split("@")
506
self.requester.set_auth("basic", cred)
507
508
if parsed.scheme not in (UNKNOWN, "https", "http"):
509
raise InvalidURLException(f"Unsupported URI scheme: {parsed.scheme}")
510
511
port = parsed.port
512
# If no port is specified, set default (80, 443) based on the scheme
513
if not port:
514
port = STANDARD_PORTS.get(parsed.scheme, None)
515
elif not 0 < port < 65536:
516
raise InvalidURLException(f"Invalid port number: {port}")
517
518
if options["ip"]:
519
cache_dns(parsed.hostname, port, options["ip"])
520
521
try:
522
# If no scheme is found, detect it by port number
523
scheme = (
524
parsed.scheme
525
if parsed.scheme != UNKNOWN
526
else detect_scheme(parsed.hostname, port)
527
)
528
except ValueError:
529
# If the user neither provides the port nor scheme, guess them based
530
# on standard website characteristics
531
scheme = detect_scheme(parsed.hostname, 443)
532
port = STANDARD_PORTS[scheme]
533
534
self.url = f"{scheme}://{parsed.hostname}"
535
536
if port != STANDARD_PORTS[scheme]:
537
self.url += f":{port}"
538
539
self.url += "/"
540
541
self.requester.set_url(self.url)
542
543
def reset_consecutive_errors(self, response: BaseResponse) -> None:
544
self.consecutive_errors = 0
545
546
def match_callback(self, response: BaseResponse) -> None:
547
if response.status in options["skip_on_status"]:
548
raise SkipTargetInterrupt(
549
f"Skipped the target due to {response.status} status code"
550
)
551
552
interface.status_report(response, options["full_url"])
553
554
if response.status in options["recursion_status_codes"] and any(
555
(
556
options["recursive"],
557
options["deep_recursive"],
558
options["force_recursive"],
559
)
560
):
561
if response.redirect:
562
new_path = clean_path(parse_path(response.redirect))
563
added_to_queue = self.recur_for_redirect(response.path, new_path)
564
elif len(response.history):
565
old_path = clean_path(parse_path(response.history[0]))
566
added_to_queue = self.recur_for_redirect(old_path, response.path)
567
else:
568
added_to_queue = self.recur(response.path)
569
570
if added_to_queue:
571
interface.new_directories(added_to_queue)
572
573
if options["replay_proxy"]:
574
# Replay the request with new proxy
575
if options["async_mode"]:
576
self.loop.create_task(self.requester.replay_request(response.full_path, proxy=options["replay_proxy"]))
577
else:
578
self.requester.request(response.full_path, proxy=options["replay_proxy"])
579
580
if options["crawl"]:
581
for path in Crawler.crawl(response):
582
if not self.dictionary.is_valid(path):
583
continue
584
path = lstrip_once(path, self.base_path)
585
self.dictionary.add_extra(path)
586
587
def update_progress_bar(self, response: BaseResponse) -> None:
588
jobs_count = (
589
# Jobs left for unscanned targets
590
len(options["subdirs"]) * (len(options["urls"]) - 1)
591
# Jobs left for the current target
592
+ len(self.directories)
593
# Finished jobs
594
+ self.jobs_processed
595
)
596
597
interface.last_path(
598
self.dictionary.index,
599
len(self.dictionary),
600
self.jobs_processed + 1,
601
jobs_count,
602
self.requester.rate,
603
self.errors,
604
)
605
606
def raise_error(self, exception: RequestException) -> None:
607
if options["exit_on_error"]:
608
raise QuitInterrupt("Canceled due to an error")
609
610
self.errors += 1
611
self.consecutive_errors += 1
612
613
if self.consecutive_errors > MAX_CONSECUTIVE_REQUEST_ERRORS:
614
raise SkipTargetInterrupt("Too many request errors")
615
616
def append_error_log(self, exception: RequestException) -> None:
617
logger.exception(exception)
618
619
def _force_exit(self) -> None:
620
"""Force process termination, stopping asyncio loop if running."""
621
interface.warning("\nForce quit!", do_save=False)
622
# Stop asyncio loop first if running (prevents hang in async mode)
623
if self.loop and self.loop.is_running():
624
try:
625
self.loop.stop()
626
except Exception:
627
pass
628
os._exit(1)
629
630
def handle_pause(self) -> None:
631
"""Handle SIGINT (Ctrl+C) by pausing execution and showing options."""
632
if self._handling_pause:
633
self._force_quit_handler.check_force_quit()
634
return
635
636
self._handling_pause = True
637
self._force_quit_handler.on_pause_start()
638
639
try:
640
try:
641
interface.warning(
642
"CTRL+C detected: Pausing threads, please wait...", do_save=False
643
)
644
if not self.fuzzer.pause():
645
interface.warning(
646
"Could not pause all threads (some may be blocked on I/O). "
647
"Press CTRL+C again to force quit.",
648
do_save=False
649
)
650
except Exception:
651
# If pause fails for any reason, still show the menu
652
pass
653
654
while True:
655
msg = "[q]uit / [c]ontinue"
656
657
if len(self.directories) > 1:
658
msg += " / [n]ext"
659
660
if len(options["urls"]) > 1:
661
msg += " / [s]kip target"
662
663
interface.in_line(msg + ": ")
664
665
option = input()
666
667
if option.lower() == "q":
668
interface.in_line("[s]ave / [q]uit without saving: ")
669
670
option = input()
671
672
if option.lower() == "s":
673
default_session_path = format_session_path(
674
options["session_file"] or DEFAULT_SESSION_FILE
675
)
676
msg = f"Save to file [{default_session_path}]: "
677
678
interface.in_line(msg)
679
680
session_file = format_session_path(input() or default_session_path)
681
682
self._export(session_file)
683
quitexc = QuitInterrupt(f"Session saved to: {session_file}")
684
if options["async_mode"]:
685
self.pause_future.set_exception(quitexc)
686
break
687
else:
688
raise quitexc
689
elif option.lower() == "q":
690
quitexc = QuitInterrupt("Canceled by the user")
691
if options["async_mode"]:
692
self.pause_future.set_exception(quitexc)
693
break
694
else:
695
raise quitexc
696
697
elif option.lower() == "c":
698
self._handling_pause = False
699
self._force_quit_handler.on_resume()
700
self.fuzzer.play()
701
break
702
703
elif option.lower() == "n" and len(self.directories) > 1:
704
self.fuzzer.quit()
705
break
706
707
elif option.lower() == "s" and len(options["urls"]) > 1:
708
skipexc = SkipTargetInterrupt("Target skipped by the user")
709
if options["async_mode"]:
710
self.pause_future.set_exception(skipexc)
711
break
712
else:
713
raise skipexc
714
finally:
715
pass
716
717
def add_directory(self, path: str) -> None:
718
"""Add directory to the recursion queue"""
719
720
# Pass if path is in exclusive directories
721
if any(
722
path.startswith(dir) or "/" + dir in path
723
for dir in options["exclude_subdirs"]
724
):
725
return
726
727
url = self.url + path
728
729
if (
730
path.count("/") - self.base_path.count("/") > options["recursion_depth"] > 0
731
or url in self.passed_urls
732
):
733
return
734
735
self.directories.append(path)
736
self.passed_urls.add(url)
737
738
@locked
739
def recur(self, path: str) -> list[str]:
740
dirs_count = len(self.directories)
741
path = clean_path(path)
742
743
if options["force_recursive"] and not path.endswith("/"):
744
path += "/"
745
746
if options["deep_recursive"]:
747
i = 0
748
for _ in range(path.count("/")):
749
i = path.index("/", i) + 1
750
self.add_directory(path[:i])
751
elif (
752
options["recursive"]
753
and path.endswith("/")
754
and re.search(EXTENSION_RECOGNITION_REGEX, path[:-1]) is None
755
):
756
self.add_directory(path)
757
758
# Return newly added directories
759
return self.directories[dirs_count:]
760
761
def recur_for_redirect(self, path: str, redirect_path: str) -> list[str]:
762
if redirect_path == path + "/":
763
return self.recur(redirect_path)
764
765
return []
766
767