Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
1N3
GitHub Repository: 1N3/Sn1per
Path: blob/master/bin/github-subdomains.py
2960 views
1
#!/usr/bin/python3.5
2
3
# I don't believe in license.
4
# You can do whatever you want with this program.
5
6
import os
7
import sys
8
import re
9
import time
10
import requests
11
import random
12
import argparse
13
from functools import partial
14
from colored import fg, bg, attr
15
from multiprocessing.dummy import Pool
16
17
18
TOKENS_FILE = os.path.dirname(os.path.realpath(__file__))+'/.tokens'
19
20
21
def githubApiSearchCode( search, page ):
22
headers = {"Authorization":"token "+random.choice(t_tokens)}
23
url = 'https://api.github.com/search/code?s=indexed&type=Code&o=desc&q=' + search + '&page=' + str(page)
24
# print(url)
25
26
try:
27
r = requests.get( url, headers=headers, timeout=5 )
28
json = r.json()
29
return json
30
except Exception as e:
31
print( "%s[-] error occurred: %s%s" % (fg('red'),e,attr(0)) )
32
return False
33
34
35
def getRawUrl( result ):
36
raw_url = result['html_url'];
37
raw_url = raw_url.replace( 'https://github.com/', 'https://raw.githubusercontent.com/' )
38
raw_url = raw_url.replace( '/blob/', '/' )
39
return raw_url;
40
41
42
def readCode( regexp, source, result ):
43
url = getRawUrl( result )
44
code = doGetCode( url )
45
# print(code)
46
47
if code:
48
matches = re.findall( regexp, code )
49
if matches:
50
for sub in matches:
51
# print(sub)
52
sub = sub[0].replace('2F','').lower().strip()
53
if len(sub) and not sub in t_history:
54
t_history.append( sub )
55
sys.stdout.write( "%s" % sub )
56
if source:
57
sys.stdout.write( "\t-> %s" % result['html_url'] )
58
sys.stdout.write( "\n" )
59
60
61
def doGetCode( url ):
62
# print( url )
63
try:
64
r = requests.get( url, timeout=5 )
65
except Exception as e:
66
sys.stdout.write( "%s[-] error occurred: %s%s\n" % (fg('red'),e,attr(0)) )
67
return False
68
69
return r.text
70
71
72
parser = argparse.ArgumentParser()
73
parser.add_argument( "-t","--token",help="auth token (required)" )
74
parser.add_argument( "-d","--domain",help="domain you are looking for (required)" )
75
parser.add_argument( "-e","--extend",help="also look for <dummy>example.com", action="store_true" )
76
parser.add_argument( "-s","--source",help="display first url where subdomains are found", action="store_true" )
77
parser.parse_args()
78
args = parser.parse_args()
79
80
t_tokens = []
81
if args.token:
82
t_tokens = args.token.split(',')
83
else:
84
if os.path.isfile(TOKENS_FILE):
85
fp = open(TOKENS_FILE,'r')
86
t_tokens = fp.read().split("\n")
87
fp.close()
88
89
if not len(t_tokens):
90
parser.error( 'auth token is missing' )
91
92
if args.source:
93
_source = True
94
else:
95
_source = False
96
97
if args.domain:
98
_domain = args.domain
99
else:
100
parser.error( 'domain is missing' )
101
102
t_history = []
103
page = 1
104
_search = '"' + _domain + '"'
105
106
### this is a test, looks like we got more result that way
107
import tldextract
108
t_host_parse = tldextract.extract( _domain )
109
_search = '"' + t_host_parse.domain + '"'
110
# print( t_host_parse )
111
# exit()
112
###
113
114
# egrep -io "[0-9a-z_\-\.]+\.([0-9a-z_\-]+)?`echo $h|awk -F '.' '{print $(NF-1)}'`([0-9a-z_\-\.]+)?\.[a-z]{1,5}"
115
116
117
if args.extend:
118
# _regexp = r'[0-9a-zA-Z_\-\.]+' + _domain.replace('.','\.')
119
_regexp = r'([0-9a-z_\-\.]+\.([0-9a-z_\-]+)?'+t_host_parse.domain+'([0-9a-z_\-\.]+)?\.[a-z]{1,5})'
120
else:
121
_regexp = r'(([0-9a-zA-Z_\-\.]+)\.' + _domain.replace('.','\.')+')'
122
# print(_regexp)
123
124
# for page in range(1,10):
125
while True:
126
time.sleep( 1 )
127
t_json = githubApiSearchCode( _search, page )
128
# print(t_json)
129
page = page + 1
130
131
if not t_json or 'documentation_url' in t_json or not 'items' in t_json or not len(t_json['items']):
132
break
133
134
pool = Pool( 30 )
135
pool.map( partial(readCode,_regexp,_source), t_json['items'] )
136
pool.close()
137
pool.join()
138
139