CoCalc -- sherlock.py

GitHub Repository: sherlock-project/sherlock
Path: blob/master/sherlock_project/sherlock.py
⁷⁶¹ views
1
#! /usr/bin/env python3
2

3
"""
4
Sherlock: Find Usernames Across Social Networks Module
5

6
This module contains the main logic to search for usernames at social
7
networks.
8
"""
9

10
import sys
11

12
try:
13
    from sherlock_project.__init__ import import_error_test_var # noqa: F401
14
except ImportError:
15
    print("Did you run Sherlock with `python3 sherlock/sherlock.py ...`?")
16
    print("This is an outdated method. Please see https://sherlockproject.xyz/installation for up to date instructions.")
17
    sys.exit(1)
18

19
import csv
20
import signal
21
import pandas as pd
22
import os
23
import re
24
from argparse import ArgumentParser, RawDescriptionHelpFormatter
25
from json import loads as json_loads
26
from time import monotonic
27
from typing import Optional
28

29
import requests
30
from requests_futures.sessions import FuturesSession
31

32
from sherlock_project.__init__ import (
33
    __longname__,
34
    __shortname__,
35
    __version__,
36
    forge_api_latest_release,
37
)
38

39
from sherlock_project.result import QueryStatus
40
from sherlock_project.result import QueryResult
41
from sherlock_project.notify import QueryNotify
42
from sherlock_project.notify import QueryNotifyPrint
43
from sherlock_project.sites import SitesInformation
44
from colorama import init
45
from argparse import ArgumentTypeError
46

47

48
class SherlockFuturesSession(FuturesSession):
49
    def request(self, method, url, hooks=None, *args, **kwargs):
50
        """Request URL.
51

52
        This extends the FuturesSession request method to calculate a response
53
        time metric to each request.
54

55
        It is taken (almost) directly from the following Stack Overflow answer:
56
        https://github.com/ross/requests-futures#working-in-the-background
57

58
        Keyword Arguments:
59
        self                   -- This object.
60
        method                 -- String containing method desired for request.
61
        url                    -- String containing URL for request.
62
        hooks                  -- Dictionary containing hooks to execute after
63
                                  request finishes.
64
        args                   -- Arguments.
65
        kwargs                 -- Keyword arguments.
66

67
        Return Value:
68
        Request object.
69
        """
70
        # Record the start time for the request.
71
        if hooks is None:
72
            hooks = {}
73
        start = monotonic()
74

75
        def response_time(resp, *args, **kwargs):
76
            """Response Time Hook.
77

78
            Keyword Arguments:
79
            resp                   -- Response object.
80
            args                   -- Arguments.
81
            kwargs                 -- Keyword arguments.
82

83
            Return Value:
84
            Nothing.
85
            """
86
            resp.elapsed = monotonic() - start
87

88
            return
89

90
        # Install hook to execute when response completes.
91
        # Make sure that the time measurement hook is first, so we will not
92
        # track any later hook's execution time.
93
        try:
94
            if isinstance(hooks["response"], list):
95
                hooks["response"].insert(0, response_time)
96
            elif isinstance(hooks["response"], tuple):
97
                # Convert tuple to list and insert time measurement hook first.
98
                hooks["response"] = list(hooks["response"])
99
                hooks["response"].insert(0, response_time)
100
            else:
101
                # Must have previously contained a single hook function,
102
                # so convert to list.
103
                hooks["response"] = [response_time, hooks["response"]]
104
        except KeyError:
105
            # No response hook was already defined, so install it ourselves.
106
            hooks["response"] = [response_time]
107

108
        return super(SherlockFuturesSession, self).request(
109
            method, url, hooks=hooks, *args, **kwargs
110
        )
111

112

113
def get_response(request_future, error_type, social_network):
114
    # Default for Response object if some failure occurs.
115
    response = None
116

117
    error_context = "General Unknown Error"
118
    exception_text = None
119
    try:
120
        response = request_future.result()
121
        if response.status_code:
122
            # Status code exists in response object
123
            error_context = None
124
    except requests.exceptions.HTTPError as errh:
125
        error_context = "HTTP Error"
126
        exception_text = str(errh)
127
    except requests.exceptions.ProxyError as errp:
128
        error_context = "Proxy Error"
129
        exception_text = str(errp)
130
    except requests.exceptions.ConnectionError as errc:
131
        error_context = "Error Connecting"
132
        exception_text = str(errc)
133
    except requests.exceptions.Timeout as errt:
134
        error_context = "Timeout Error"
135
        exception_text = str(errt)
136
    except requests.exceptions.RequestException as err:
137
        error_context = "Unknown Error"
138
        exception_text = str(err)
139

140
    return response, error_context, exception_text
141

142

143
def interpolate_string(input_object, username):
144
    if isinstance(input_object, str):
145
        return input_object.replace("{}", username)
146
    elif isinstance(input_object, dict):
147
        return {k: interpolate_string(v, username) for k, v in input_object.items()}
148
    elif isinstance(input_object, list):
149
        return [interpolate_string(i, username) for i in input_object]
150
    return input_object
151

152

153
def check_for_parameter(username):
154
    """checks if {?} exists in the username
155
    if exist it means that sherlock is looking for more multiple username"""
156
    return "{?}" in username
157

158

159
checksymbols = ["_", "-", "."]
160

161

162
def multiple_usernames(username):
163
    """replace the parameter with with symbols and return a list of usernames"""
164
    allUsernames = []
165
    for i in checksymbols:
166
        allUsernames.append(username.replace("{?}", i))
167
    return allUsernames
168

169

170
def sherlock(
171
    username: str,
172
    site_data: dict[str, dict[str, str]],
173
    query_notify: QueryNotify,
174
    dump_response: bool = False,
175
    proxy: Optional[str] = None,
176
    timeout: int = 60,
177
) -> dict[str, dict[str, str | QueryResult]]:
178
    """Run Sherlock Analysis.
179

180
    Checks for existence of username on various social media sites.
181

182
    Keyword Arguments:
183
    username               -- String indicating username that report
184
                              should be created against.
185
    site_data              -- Dictionary containing all of the site data.
186
    query_notify           -- Object with base type of QueryNotify().
187
                              This will be used to notify the caller about
188
                              query results.
189
    proxy                  -- String indicating the proxy URL
190
    timeout                -- Time in seconds to wait before timing out request.
191
                              Default is 60 seconds.
192

193
    Return Value:
194
    Dictionary containing results from report. Key of dictionary is the name
195
    of the social network site, and the value is another dictionary with
196
    the following keys:
197
        url_main:      URL of main site.
198
        url_user:      URL of user on site (if account exists).
199
        status:        QueryResult() object indicating results of test for
200
                       account existence.
201
        http_status:   HTTP status code of query which checked for existence on
202
                       site.
203
        response_text: Text that came back from request.  May be None if
204
                       there was an HTTP error when checking for existence.
205
    """
206

207
    # Notify caller that we are starting the query.
208
    query_notify.start(username)
209

210
    # Normal requests
211
    underlying_session = requests.session()
212

213
    # Limit number of workers to 20.
214
    # This is probably vastly overkill.
215
    if len(site_data) >= 20:
216
        max_workers = 20
217
    else:
218
        max_workers = len(site_data)
219

220
    # Create multi-threaded session for all requests.
221
    session = SherlockFuturesSession(
222
        max_workers=max_workers, session=underlying_session
223
    )
224

225
    # Results from analysis of all sites
226
    results_total = {}
227

228
    # First create futures for all requests. This allows for the requests to run in parallel
229
    for social_network, net_info in site_data.items():
230
        # Results from analysis of this specific site
231
        results_site = {"url_main": net_info.get("urlMain")}
232

233
        # Record URL of main site
234

235
        # A user agent is needed because some sites don't return the correct
236
        # information since they think that we are bots (Which we actually are...)
237
        headers = {
238
            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0",
239
        }
240

241
        if "headers" in net_info:
242
            # Override/append any extra headers required by a given site.
243
            headers.update(net_info["headers"])
244

245
        # URL of user on site (if it exists)
246
        url = interpolate_string(net_info["url"], username.replace(' ', '%20'))
247

248
        # Don't make request if username is invalid for the site
249
        regex_check = net_info.get("regexCheck")
250
        if regex_check and re.search(regex_check, username) is None:
251
            # No need to do the check at the site: this username is not allowed.
252
            results_site["status"] = QueryResult(
253
                username, social_network, url, QueryStatus.ILLEGAL
254
            )
255
            results_site["url_user"] = ""
256
            results_site["http_status"] = ""
257
            results_site["response_text"] = ""
258
            query_notify.update(results_site["status"])
259
        else:
260
            # URL of user on site (if it exists)
261
            results_site["url_user"] = url
262
            url_probe = net_info.get("urlProbe")
263
            request_method = net_info.get("request_method")
264
            request_payload = net_info.get("request_payload")
265
            request = None
266

267
            if request_method is not None:
268
                if request_method == "GET":
269
                    request = session.get
270
                elif request_method == "HEAD":
271
                    request = session.head
272
                elif request_method == "POST":
273
                    request = session.post
274
                elif request_method == "PUT":
275
                    request = session.put
276
                else:
277
                    raise RuntimeError(f"Unsupported request_method for {url}")
278

279
            if request_payload is not None:
280
                request_payload = interpolate_string(request_payload, username)
281

282
            if url_probe is None:
283
                # Probe URL is normal one seen by people out on the web.
284
                url_probe = url
285
            else:
286
                # There is a special URL for probing existence separate
287
                # from where the user profile normally can be found.
288
                url_probe = interpolate_string(url_probe, username)
289

290
            if request is None:
291
                if net_info["errorType"] == "status_code":
292
                    # In most cases when we are detecting by status code,
293
                    # it is not necessary to get the entire body:  we can
294
                    # detect fine with just the HEAD response.
295
                    request = session.head
296
                else:
297
                    # Either this detect method needs the content associated
298
                    # with the GET response, or this specific website will
299
                    # not respond properly unless we request the whole page.
300
                    request = session.get
301

302
            if net_info["errorType"] == "response_url":
303
                # Site forwards request to a different URL if username not
304
                # found.  Disallow the redirect so we can capture the
305
                # http status from the original URL request.
306
                allow_redirects = False
307
            else:
308
                # Allow whatever redirect that the site wants to do.
309
                # The final result of the request will be what is available.
310
                allow_redirects = True
311

312
            # This future starts running the request in a new thread, doesn't block the main thread
313
            if proxy is not None:
314
                proxies = {"http": proxy, "https": proxy}
315
                future = request(
316
                    url=url_probe,
317
                    headers=headers,
318
                    proxies=proxies,
319
                    allow_redirects=allow_redirects,
320
                    timeout=timeout,
321
                    json=request_payload,
322
                )
323
            else:
324
                future = request(
325
                    url=url_probe,
326
                    headers=headers,
327
                    allow_redirects=allow_redirects,
328
                    timeout=timeout,
329
                    json=request_payload,
330
                )
331

332
            # Store future in data for access later
333
            net_info["request_future"] = future
334

335
        # Add this site's results into final dictionary with all the other results.
336
        results_total[social_network] = results_site
337

338
    # Open the file containing account links
339
    for social_network, net_info in site_data.items():
340
        # Retrieve results again
341
        results_site = results_total.get(social_network)
342

343
        # Retrieve other site information again
344
        url = results_site.get("url_user")
345
        status = results_site.get("status")
346
        if status is not None:
347
            # We have already determined the user doesn't exist here
348
            continue
349

350
        # Get the expected error type
351
        error_type = net_info["errorType"]
352
        if isinstance(error_type, str):
353
            error_type: list[str] = [error_type]
354

355
        # Retrieve future and ensure it has finished
356
        future = net_info["request_future"]
357
        r, error_text, exception_text = get_response(
358
            request_future=future, error_type=error_type, social_network=social_network
359
        )
360

361
        # Get response time for response of our request.
362
        try:
363
            response_time = r.elapsed
364
        except AttributeError:
365
            response_time = None
366

367
        # Attempt to get request information
368
        try:
369
            http_status = r.status_code
370
        except Exception:
371
            http_status = "?"
372
        try:
373
            response_text = r.text.encode(r.encoding or "UTF-8")
374
        except Exception:
375
            response_text = ""
376

377
        query_status = QueryStatus.UNKNOWN
378
        error_context = None
379

380
        # As WAFs advance and evolve, they will occasionally block Sherlock and
381
        # lead to false positives and negatives. Fingerprints should be added
382
        # here to filter results that fail to bypass WAFs. Fingerprints should
383
        # be highly targetted. Comment at the end of each fingerprint to
384
        # indicate target and date fingerprinted.
385
        WAFHitMsgs = [
386
            r'.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare
387
            r'<span id="challenge-error-text">', # 2024-11-11 Cloudflare error page
388
            r'AwsWafIntegration.forceRefreshToken', # 2024-11-11 Cloudfront (AWS)
389
            r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:' # 2024-04-09 PerimeterX / Human Security
390
        ]
391

392
        if error_text is not None:
393
            error_context = error_text
394

395
        elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
396
            query_status = QueryStatus.WAF
397

398
        else:
399
            if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
400
                error_context = f"Unknown error type '{error_type}' for {social_network}"
401
                query_status = QueryStatus.UNKNOWN
402
            else:
403
                if "message" in error_type:
404
                    # error_flag True denotes no error found in the HTML
405
                    # error_flag False denotes error found in the HTML
406
                    error_flag = True
407
                    errors = net_info.get("errorMsg")
408
                    # errors will hold the error message
409
                    # it can be string or list
410
                    # by isinstance method we can detect that
411
                    # and handle the case for strings as normal procedure
412
                    # and if its list we can iterate the errors
413
                    if isinstance(errors, str):
414
                        # Checks if the error message is in the HTML
415
                        # if error is present we will set flag to False
416
                        if errors in r.text:
417
                            error_flag = False
418
                    else:
419
                        # If it's list, it will iterate all the error message
420
                        for error in errors:
421
                            if error in r.text:
422
                                error_flag = False
423
                                break
424
                    if error_flag:
425
                        query_status = QueryStatus.CLAIMED
426
                    else:
427
                        query_status = QueryStatus.AVAILABLE
428

429
                if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
430
                    error_codes = net_info.get("errorCode")
431
                    query_status = QueryStatus.CLAIMED
432

433
                    # Type consistency, allowing for both singlets and lists in manifest
434
                    if isinstance(error_codes, int):
435
                        error_codes = [error_codes]
436

437
                    if error_codes is not None and r.status_code in error_codes:
438
                        query_status = QueryStatus.AVAILABLE
439
                    elif r.status_code >= 300 or r.status_code < 200:
440
                        query_status = QueryStatus.AVAILABLE
441

442
                if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
443
                    # For this detection method, we have turned off the redirect.
444
                    # So, there is no need to check the response URL: it will always
445
                    # match the request.  Instead, we will ensure that the response
446
                    # code indicates that the request was successful (i.e. no 404, or
447
                    # forward to some odd redirect).
448
                    if 200 <= r.status_code < 300:
449
                        query_status = QueryStatus.CLAIMED
450
                    else:
451
                        query_status = QueryStatus.AVAILABLE
452

453
        if dump_response:
454
            print("+++++++++++++++++++++")
455
            print(f"TARGET NAME   : {social_network}")
456
            print(f"USERNAME      : {username}")
457
            print(f"TARGET URL    : {url}")
458
            print(f"TEST METHOD   : {error_type}")
459
            try:
460
                print(f"STATUS CODES  : {net_info['errorCode']}")
461
            except KeyError:
462
                pass
463
            print("Results...")
464
            try:
465
                print(f"RESPONSE CODE : {r.status_code}")
466
            except Exception:
467
                pass
468
            try:
469
                print(f"ERROR TEXT    : {net_info['errorMsg']}")
470
            except KeyError:
471
                pass
472
            print(">>>>> BEGIN RESPONSE TEXT")
473
            try:
474
                print(r.text)
475
            except Exception:
476
                pass
477
            print("<<<<< END RESPONSE TEXT")
478
            print("VERDICT       : " + str(query_status))
479
            print("+++++++++++++++++++++")
480

481
        # Notify caller about results of query.
482
        result: QueryResult = QueryResult(
483
            username=username,
484
            site_name=social_network,
485
            site_url_user=url,
486
            status=query_status,
487
            query_time=response_time,
488
            context=error_context,
489
        )
490
        query_notify.update(result)
491

492
        # Save status of request
493
        results_site["status"] = result
494

495
        # Save results from request
496
        results_site["http_status"] = http_status
497
        results_site["response_text"] = response_text
498

499
        # Add this site's results into final dictionary with all of the other results.
500
        results_total[social_network] = results_site
501

502
    return results_total
503

504

505
def timeout_check(value):
506
    """Check Timeout Argument.
507

508
    Checks timeout for validity.
509

510
    Keyword Arguments:
511
    value                  -- Time in seconds to wait before timing out request.
512

513
    Return Value:
514
    Floating point number representing the time (in seconds) that should be
515
    used for the timeout.
516

517
    NOTE:  Will raise an exception if the timeout in invalid.
518
    """
519

520
    float_value = float(value)
521

522
    if float_value <= 0:
523
        raise ArgumentTypeError(
524
            f"Invalid timeout value: {value}. Timeout must be a positive number."
525
        )
526

527
    return float_value
528

529

530
def handler(signal_received, frame):
531
    """Exit gracefully without throwing errors
532

533
    Source: https://www.devdungeon.com/content/python-catch-sigint-ctrl-c
534
    """
535
    sys.exit(0)
536

537

538
def main():
539
    parser = ArgumentParser(
540
        formatter_class=RawDescriptionHelpFormatter,
541
        description=f"{__longname__} (Version {__version__})",
542
    )
543
    parser.add_argument(
544
        "--version",
545
        action="version",
546
        version=f"{__shortname__} v{__version__}",
547
        help="Display version information and dependencies.",
548
    )
549
    parser.add_argument(
550
        "--verbose",
551
        "-v",
552
        "-d",
553
        "--debug",
554
        action="store_true",
555
        dest="verbose",
556
        default=False,
557
        help="Display extra debugging information and metrics.",
558
    )
559
    parser.add_argument(
560
        "--folderoutput",
561
        "-fo",
562
        dest="folderoutput",
563
        help="If using multiple usernames, the output of the results will be saved to this folder.",
564
    )
565
    parser.add_argument(
566
        "--output",
567
        "-o",
568
        dest="output",
569
        help="If using single username, the output of the result will be saved to this file.",
570
    )
571
    parser.add_argument(
572
        "--csv",
573
        action="store_true",
574
        dest="csv",
575
        default=False,
576
        help="Create Comma-Separated Values (CSV) File.",
577
    )
578
    parser.add_argument(
579
        "--xlsx",
580
        action="store_true",
581
        dest="xlsx",
582
        default=False,
583
        help="Create the standard file for the modern Microsoft Excel spreadsheet (xlsx).",
584
    )
585
    parser.add_argument(
586
        "--site",
587
        action="append",
588
        metavar="SITE_NAME",
589
        dest="site_list",
590
        default=[],
591
        help="Limit analysis to just the listed sites. Add multiple options to specify more than one site.",
592
    )
593
    parser.add_argument(
594
        "--proxy",
595
        "-p",
596
        metavar="PROXY_URL",
597
        action="store",
598
        dest="proxy",
599
        default=None,
600
        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
601
    )
602
    parser.add_argument(
603
        "--dump-response",
604
        action="store_true",
605
        dest="dump_response",
606
        default=False,
607
        help="Dump the HTTP response to stdout for targeted debugging.",
608
    )
609
    parser.add_argument(
610
        "--json",
611
        "-j",
612
        metavar="JSON_FILE",
613
        dest="json_file",
614
        default=None,
615
        help="Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.",
616
    )
617
    parser.add_argument(
618
        "--timeout",
619
        action="store",
620
        metavar="TIMEOUT",
621
        dest="timeout",
622
        type=timeout_check,
623
        default=60,
624
        help="Time (in seconds) to wait for response to requests (Default: 60)",
625
    )
626
    parser.add_argument(
627
        "--print-all",
628
        action="store_true",
629
        dest="print_all",
630
        default=False,
631
        help="Output sites where the username was not found.",
632
    )
633
    parser.add_argument(
634
        "--print-found",
635
        action="store_true",
636
        dest="print_found",
637
        default=True,
638
        help="Output sites where the username was found (also if exported as file).",
639
    )
640
    parser.add_argument(
641
        "--no-color",
642
        action="store_true",
643
        dest="no_color",
644
        default=False,
645
        help="Don't color terminal output",
646
    )
647
    parser.add_argument(
648
        "username",
649
        nargs="+",
650
        metavar="USERNAMES",
651
        action="store",
652
        help="One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').",
653
    )
654
    parser.add_argument(
655
        "--browse",
656
        "-b",
657
        action="store_true",
658
        dest="browse",
659
        default=False,
660
        help="Browse to all results on default browser.",
661
    )
662

663
    parser.add_argument(
664
        "--local",
665
        "-l",
666
        action="store_true",
667
        default=False,
668
        help="Force the use of the local data.json file.",
669
    )
670

671
    parser.add_argument(
672
        "--nsfw",
673
        action="store_true",
674
        default=False,
675
        help="Include checking of NSFW sites from default list.",
676
    )
677

678
    parser.add_argument(
679
        "--txt",
680
        action="store_true",
681
        dest="output_txt",
682
        default=False,
683
        help="Enable creation of a txt file",
684
    )
685

686
    parser.add_argument(
687
        "--ignore-exclusions",
688
        action="store_true",
689
        dest="ignore_exclusions",
690
        default=False,
691
        help="Ignore upstream exclusions (may return more false positives)",
692
    )
693

694
    args = parser.parse_args()
695

696
    # If the user presses CTRL-C, exit gracefully without throwing errors
697
    signal.signal(signal.SIGINT, handler)
698

699
    # Check for newer version of Sherlock. If it exists, let the user know about it
700
    try:
701
        latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
702
        latest_release_json = json_loads(latest_release_raw)
703
        latest_remote_tag = latest_release_json["tag_name"]
704

705
        if latest_remote_tag[1:] != __version__:
706
            print(
707
                f"Update available! {__version__} --> {latest_remote_tag[1:]}"
708
                f"\n{latest_release_json['html_url']}"
709
            )
710

711
    except Exception as error:
712
        print(f"A problem occurred while checking for an update: {error}")
713

714
    # Make prompts
715
    if args.proxy is not None:
716
        print("Using the proxy: " + args.proxy)
717

718
    if args.no_color:
719
        # Disable color output.
720
        init(strip=True, convert=False)
721
    else:
722
        # Enable color output.
723
        init(autoreset=True)
724

725
    # Check if both output methods are entered as input.
726
    if args.output is not None and args.folderoutput is not None:
727
        print("You can only use one of the output methods.")
728
        sys.exit(1)
729

730
    # Check validity for single username output.
731
    if args.output is not None and len(args.username) != 1:
732
        print("You can only use --output with a single username")
733
        sys.exit(1)
734

735
    # Create object with all information about sites we are aware of.
736
    try:
737
        if args.local:
738
            sites = SitesInformation(
739
                os.path.join(os.path.dirname(__file__), "resources/data.json"),
740
                honor_exclusions=False,
741
            )
742
        else:
743
            json_file_location = args.json_file
744
            if args.json_file:
745
                # If --json parameter is a number, interpret it as a pull request number
746
                if args.json_file.isnumeric():
747
                    pull_number = args.json_file
748
                    pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
749
                    pull_request_raw = requests.get(pull_url, timeout=10).text
750
                    pull_request_json = json_loads(pull_request_raw)
751

752
                    # Check if it's a valid pull request
753
                    if "message" in pull_request_json:
754
                        print(f"ERROR: Pull request #{pull_number} not found.")
755
                        sys.exit(1)
756

757
                    head_commit_sha = pull_request_json["head"]["sha"]
758
                    json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
759

760
            sites = SitesInformation(
761
                data_file_path=json_file_location,
762
                honor_exclusions=not args.ignore_exclusions,
763
                do_not_exclude=args.site_list,
764
            )
765
    except Exception as error:
766
        print(f"ERROR:  {error}")
767
        sys.exit(1)
768

769
    if not args.nsfw:
770
        sites.remove_nsfw_sites(do_not_remove=args.site_list)
771

772
    # Create original dictionary from SitesInformation() object.
773
    # Eventually, the rest of the code will be updated to use the new object
774
    # directly, but this will glue the two pieces together.
775
    site_data_all = {site.name: site.information for site in sites}
776
    if args.site_list == []:
777
        # Not desired to look at a sub-set of sites
778
        site_data = site_data_all
779
    else:
780
        # User desires to selectively run queries on a sub-set of the site list.
781
        # Make sure that the sites are supported & build up pruned site database.
782
        site_data = {}
783
        site_missing = []
784
        for site in args.site_list:
785
            counter = 0
786
            for existing_site in site_data_all:
787
                if site.lower() == existing_site.lower():
788
                    site_data[existing_site] = site_data_all[existing_site]
789
                    counter += 1
790
            if counter == 0:
791
                # Build up list of sites not supported for future error message.
792
                site_missing.append(f"'{site}'")
793

794
        if site_missing:
795
            print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
796

797
        if not site_data:
798
            sys.exit(1)
799

800
    # Create notify object for query results.
801
    query_notify = QueryNotifyPrint(
802
        result=None, verbose=args.verbose, print_all=args.print_all, browse=args.browse
803
    )
804

805
    # Run report on all specified users.
806
    all_usernames = []
807
    for username in args.username:
808
        if check_for_parameter(username):
809
            for name in multiple_usernames(username):
810
                all_usernames.append(name)
811
        else:
812
            all_usernames.append(username)
813
    for username in all_usernames:
814
        results = sherlock(
815
            username,
816
            site_data,
817
            query_notify,
818
            dump_response=args.dump_response,
819
            proxy=args.proxy,
820
            timeout=args.timeout,
821
        )
822

823
        if args.output:
824
            result_file = args.output
825
        elif args.folderoutput:
826
            # The usernames results should be stored in a targeted folder.
827
            # If the folder doesn't exist, create it first
828
            os.makedirs(args.folderoutput, exist_ok=True)
829
            result_file = os.path.join(args.folderoutput, f"{username}.txt")
830
        else:
831
            result_file = f"{username}.txt"
832

833
        if args.output_txt:
834
            with open(result_file, "w", encoding="utf-8") as file:
835
                exists_counter = 0
836
                for website_name in results:
837
                    dictionary = results[website_name]
838
                    if dictionary.get("status").status == QueryStatus.CLAIMED:
839
                        exists_counter += 1
840
                        file.write(dictionary["url_user"] + "\n")
841
                file.write(f"Total Websites Username Detected On : {exists_counter}\n")
842

843
        if args.csv:
844
            result_file = f"{username}.csv"
845
            if args.folderoutput:
846
                # The usernames results should be stored in a targeted folder.
847
                # If the folder doesn't exist, create it first
848
                os.makedirs(args.folderoutput, exist_ok=True)
849
                result_file = os.path.join(args.folderoutput, result_file)
850

851
            with open(result_file, "w", newline="", encoding="utf-8") as csv_report:
852
                writer = csv.writer(csv_report)
853
                writer.writerow(
854
                    [
855
                        "username",
856
                        "name",
857
                        "url_main",
858
                        "url_user",
859
                        "exists",
860
                        "http_status",
861
                        "response_time_s",
862
                    ]
863
                )
864
                for site in results:
865
                    if (
866
                        args.print_found
867
                        and not args.print_all
868
                        and results[site]["status"].status != QueryStatus.CLAIMED
869
                    ):
870
                        continue
871

872
                    response_time_s = results[site]["status"].query_time
873
                    if response_time_s is None:
874
                        response_time_s = ""
875
                    writer.writerow(
876
                        [
877
                            username,
878
                            site,
879
                            results[site]["url_main"],
880
                            results[site]["url_user"],
881
                            str(results[site]["status"].status),
882
                            results[site]["http_status"],
883
                            response_time_s,
884
                        ]
885
                    )
886
        if args.xlsx:
887
            usernames = []
888
            names = []
889
            url_main = []
890
            url_user = []
891
            exists = []
892
            http_status = []
893
            response_time_s = []
894

895
            for site in results:
896
                if (
897
                    args.print_found
898
                    and not args.print_all
899
                    and results[site]["status"].status != QueryStatus.CLAIMED
900
                ):
901
                    continue
902

903
                if response_time_s is None:
904
                    response_time_s.append("")
905
                else:
906
                    response_time_s.append(results[site]["status"].query_time)
907
                usernames.append(username)
908
                names.append(site)
909
                url_main.append(results[site]["url_main"])
910
                url_user.append(results[site]["url_user"])
911
                exists.append(str(results[site]["status"].status))
912
                http_status.append(results[site]["http_status"])
913

914
            DataFrame = pd.DataFrame(
915
                {
916
                    "username": usernames,
917
                    "name": names,
918
                    "url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main],
919
                    "url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user],
920
                    "exists": exists,
921
                    "http_status": http_status,
922
                    "response_time_s": response_time_s,
923
                }
924
            )
925
            DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False)
926

927
        print()
928
    query_notify.finish()
929

930

931
if __name__ == "__main__":
932
    main()
933

934
Product

Resources

Company