CoCalc -- github-subdomains.py

GitHub Repository: 1N3/Sn1per
Path: blob/master/bin/github-subdomains.py
²⁹⁶⁰ views
1
#!/usr/bin/python3.5
2

3
# I don't believe in license.
4
# You can do whatever you want with this program.
5

6
import os
7
import sys
8
import re
9
import time
10
import requests
11
import random
12
import argparse
13
from functools import partial
14
from colored import fg, bg, attr
15
from multiprocessing.dummy import Pool
16

17

18
TOKENS_FILE = os.path.dirname(os.path.realpath(__file__))+'/.tokens'
19

20

21
def githubApiSearchCode( search, page ):
22
    headers = {"Authorization":"token "+random.choice(t_tokens)}
23
    url = 'https://api.github.com/search/code?s=indexed&type=Code&o=desc&q=' + search + '&page=' + str(page)
24
    # print(url)
25

26
    try:
27
        r = requests.get( url, headers=headers, timeout=5 )
28
        json = r.json()
29
        return json
30
    except Exception as e:
31
        print( "%s[-] error occurred: %s%s" % (fg('red'),e,attr(0)) )
32
        return False
33

34

35
def getRawUrl( result ):
36
    raw_url = result['html_url'];
37
    raw_url = raw_url.replace( 'https://github.com/', 'https://raw.githubusercontent.com/' )
38
    raw_url = raw_url.replace( '/blob/', '/' )
39
    return raw_url;
40

41

42
def readCode( regexp, source, result ):
43
    url = getRawUrl( result )
44
    code = doGetCode( url )
45
    # print(code)
46

47
    if code:
48
        matches = re.findall( regexp, code )
49
        if matches:
50
            for sub in  matches:
51
                # print(sub)
52
                sub = sub[0].replace('2F','').lower().strip()
53
                if len(sub) and not sub in t_history:
54
                    t_history.append( sub )
55
                    sys.stdout.write( "%s" % sub )
56
                    if source:
57
                        sys.stdout.write( "\t-> %s" % result['html_url'] )
58
                    sys.stdout.write( "\n" )
59

60

61
def doGetCode( url ):
62
    # print( url )
63
    try:
64
        r = requests.get( url, timeout=5 )
65
    except Exception as e:
66
        sys.stdout.write( "%s[-] error occurred: %s%s\n" % (fg('red'),e,attr(0)) )
67
        return False
68

69
    return r.text
70

71

72
parser = argparse.ArgumentParser()
73
parser.add_argument( "-t","--token",help="auth token (required)" )
74
parser.add_argument( "-d","--domain",help="domain you are looking for (required)" )
75
parser.add_argument( "-e","--extend",help="also look for <dummy>example.com", action="store_true" )
76
parser.add_argument( "-s","--source",help="display first url where subdomains are found", action="store_true" )
77
parser.parse_args()
78
args = parser.parse_args()
79

80
t_tokens = []
81
if args.token:
82
    t_tokens = args.token.split(',')
83
else:
84
    if os.path.isfile(TOKENS_FILE):
85
        fp = open(TOKENS_FILE,'r')
86
        t_tokens = fp.read().split("\n")
87
        fp.close()
88

89
if not len(t_tokens):
90
    parser.error( 'auth token is missing' )
91

92
if args.source:
93
    _source = True
94
else:
95
    _source = False
96

97
if args.domain:
98
    _domain = args.domain
99
else:
100
    parser.error( 'domain is missing' )
101

102
t_history = []
103
page = 1
104
_search = '"' + _domain + '"'
105

106
### this is a test, looks like we got more result that way
107
import tldextract
108
t_host_parse = tldextract.extract( _domain )
109
_search = '"' + t_host_parse.domain + '"'
110
# print( t_host_parse )
111
# exit()
112
###
113

114
# egrep -io "[0-9a-z_\-\.]+\.([0-9a-z_\-]+)?`echo $h|awk -F '.' '{print $(NF-1)}'`([0-9a-z_\-\.]+)?\.[a-z]{1,5}"
115

116

117
if args.extend:
118
    # _regexp = r'[0-9a-zA-Z_\-\.]+' + _domain.replace('.','\.')
119
    _regexp = r'([0-9a-z_\-\.]+\.([0-9a-z_\-]+)?'+t_host_parse.domain+'([0-9a-z_\-\.]+)?\.[a-z]{1,5})'
120
else:
121
    _regexp = r'(([0-9a-zA-Z_\-\.]+)\.' + _domain.replace('.','\.')+')'
122
# print(_regexp)
123

124
# for page in range(1,10):
125
while True:
126
    time.sleep( 1 )
127
    t_json = githubApiSearchCode( _search, page )
128
    # print(t_json)
129
    page = page + 1
130

131
    if not t_json or 'documentation_url' in t_json or not 'items' in t_json or not len(t_json['items']):
132
        break
133

134
    pool = Pool( 30 )
135
    pool.map( partial(readCode,_regexp,_source), t_json['items'] )
136
    pool.close()
137
    pool.join()
138

139
Product

Resources

Company