Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
epsylon
GitHub Repository: epsylon/ufonet
Path: blob/master/core/tools/inspector.py
1205 views
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-"
3
"""
4
This file is part of the UFONet project, https://ufonet.03c8.net
5
6
Copyright (c) 2013/2020 | psy <[email protected]>
7
8
You should have received a copy of the GNU General Public License along
9
with UFONet; if not, write to the Free Software Foundation, Inc., 51
10
Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11
"""
12
import ssl, random, re
13
import urllib.request, urllib.error
14
from urllib.parse import urlparse as urlparse
15
16
# Inspector spidering class
17
class Inspector(object):
18
def __init__(self,ufonet):
19
self.ufonet=ufonet
20
# set initial counters for objets
21
self.c_images = 0
22
self.c_mov = 0
23
self.c_webm = 0
24
self.c_avi = 0
25
self.c_swf = 0
26
self.c_mpg = 0
27
self.c_mpeg = 0
28
self.c_mp3 = 0
29
self.c_ogg = 0
30
self.c_ogv = 0
31
self.c_wmv = 0
32
self.c_css = 0
33
self.c_js = 0
34
self.c_xml = 0
35
self.c_php = 0
36
self.c_html = 0
37
self.c_jsp = 0
38
self.c_asp = 0
39
self.c_txt = 0
40
self.ctx = ssl.create_default_context() # creating context to bypass SSL cert validation (black magic)
41
self.ctx.check_hostname = False
42
self.ctx.verify_mode = ssl.CERT_NONE
43
44
def proxy_transport(self, proxy):
45
proxy_url = self.ufonet.extract_proxy(proxy)
46
proxy = urllib.request.ProxyHandler({'https': proxy_url})
47
opener = urllib.request.build_opener(proxy)
48
urllib.request.install_opener(opener)
49
50
def inspecting(self, target):
51
# inspect HTML target's components sizes (ex: http://target.com/foo)
52
# [images, .mov, .webm, .avi, .swf, .mpg, .mpeg, .mp3, .ogg, .ogv,
53
# .wmv, .css, .js, .xml, .php, .html, .jsp, .asp, .txt]
54
biggest_files = {}
55
if target.endswith(""):
56
target.replace("", "/")
57
self.ufonet.user_agent = random.choice(self.ufonet.agents).strip() # shuffle user-agent
58
headers = {'User-Agent' : self.ufonet.user_agent, 'Referer' : self.ufonet.referer} # set fake user-agent and referer
59
try:
60
if self.ufonet.options.proxy: # set proxy
61
self.proxy_transport(self.ufonet.options.proxy)
62
req = urllib.request.Request(target, None, headers)
63
target_reply = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
64
else:
65
req = urllib.request.Request(target, None, headers)
66
target_reply = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
67
except:
68
print('[Error] [AI] Unable to connect to target -> [Exiting!]\n')
69
return
70
try: # search for image files
71
regex_img = []
72
regex_img1 = "<img src='(.+?)'" # search on target's results using regex with simple quotation
73
regex_img.append(regex_img1)
74
regex_img2 = '<img src="(.+?)"' # search on target's results using regex with double quotation
75
regex_img.append(regex_img2)
76
#regex_img3 = '<img src=(.+?)>' # search on target's results using regex without quotations
77
#regex_img.append(regex_img3)
78
for regimg in regex_img:
79
pattern_img = re.compile(regimg)
80
img_links = re.findall(pattern_img, target_reply)
81
imgs = {}
82
for img in img_links:
83
if self.ufonet.options.proxy: # set proxy
84
self.proxy_transport(self.ufonet.options.proxy)
85
self.ufonet.user_agent = random.choice(self.ufonet.agents).strip() # shuffle user-agent
86
headers = {'User-Agent' : self.ufonet.user_agent, 'Referer' : self.ufonet.referer} # set fake user-agent and referer
87
try:
88
if img.startswith('http'):
89
size = 0
90
else:
91
target_host = urlparse(target)
92
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
93
if not target_url.endswith('/'): # add "/" to end of target
94
target_url = target_url + "/"
95
if img.startswith("data:image"):
96
size = 0
97
else:
98
if img.startswith('/'):
99
img = img.replace("/", "", 1)
100
try:
101
if self.ufonet.options.proxy: # set proxy
102
self.proxy_transport(self.ufonet.options.proxy)
103
req = urllib.request.Request(target_url + img, None, headers)
104
img_file = urllib.request.urlopen(req, context=self.ctx).read()
105
else:
106
req = urllib.request.Request(target_url + img, None, headers)
107
img_file = urllib.request.urlopen(req, context=self.ctx).read()
108
print('+Image found: ' + target_url + img.split('"')[0])
109
size = len(img_file)
110
print('(Size: ' + str(size) + ' Bytes)')
111
imgs[img] = int(size)
112
self.c_images = self.c_images + 1
113
print('-'*12)
114
except:
115
size = 0
116
except:
117
print('[Error] [AI] Unable to retrieve info from Image -> [Discarding!]')
118
size = 0
119
biggest_image = max(list(imgs.keys()), key=lambda x: imgs[x]) # search/extract biggest image value from dict
120
if biggest_image:
121
biggest_files[biggest_image] = imgs[biggest_image] # add biggest image to list
122
except: # if not any image found, go for next
123
pass
124
try: # search for .mov files
125
regex_mov = []
126
regex_mov1 = "<a href='(.+?.mov)'" # search on target's results using regex with simple quotation
127
regex_mov.append(regex_mov1)
128
regex_mov2 = '<a href="(.+?.mov)"' # search on target's results using regex with double quotation
129
regex_mov.append(regex_mov2)
130
#regex_mov3 = '<a href=(.+?.mov)' # search on target's results using regex without quotations
131
#regex_mov.append(regex_mov3)
132
for regmov in regex_mov:
133
pattern_mov = re.compile(regmov)
134
mov_links = re.findall(pattern_mov, target_reply)
135
movs = {}
136
for mov in mov_links:
137
try:
138
if mov.startswith('http'):
139
size = 0
140
else:
141
target_host = urlparse(target)
142
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
143
if not target_url.endswith('/'): # add "/" to end of target
144
target_url = target_url + "/"
145
try:
146
if self.ufonet.options.proxy: # set proxy
147
self.proxy_transport(self.ufonet.options.proxy)
148
req = urllib.request.Request(target_url + mov, None, headers)
149
mov_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
150
else:
151
req = urllib.request.Request(target_url + mov, None, headers)
152
mov_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
153
print('+Video (.mov) found: ' + target_url + mov.split('"')[0])
154
size = len(mov_file)
155
movs[mov] = int(size)
156
print('(Size: ' + str(size) + ' Bytes)')
157
self.c_mov = self.c_mov + 1
158
print('-'*12)
159
except:
160
size = 0
161
except:
162
print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
163
size = 0
164
biggest_mov = max(list(movs.keys()), key=lambda x: movs[x]) # search/extract biggest video (.mov) value from dict
165
if biggest_mov:
166
biggest_files[biggest_mov] = movs[biggest_mov] # add biggest video (.mov) to list
167
except: # if not any .mov found, go for next
168
pass
169
try: # search for .webm files
170
regex_webm = []
171
regex_webm1 = "<a href='(.+?.webm)'" # search on target's results using regex with simple quotation
172
regex_webm.append(regex_webm1)
173
regex_webm2 = '<a href="(.+?.webm)"' # search on target's results using regex with double quotation
174
regex_webm.append(regex_webm2)
175
#regex_webm3 = '<a href=(.+?.webm)' # search on target's results using regex without quotations
176
#regex_webm.append(regex_webm3)
177
for regwebm in regex_webm:
178
pattern_webm = re.compile(regwebm)
179
webm_links = re.findall(pattern_webm, target_reply)
180
webms = {}
181
for webm in webm_links:
182
try:
183
if webm.startswith('http'):
184
size = 0
185
else:
186
target_host = urlparse(target)
187
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
188
if not target_url.endswith('/'): # add "/" to end of target
189
target_url = target_url + "/"
190
try:
191
if self.ufonet.options.proxy: # set proxy
192
self.proxy_transport(self.ufonet.options.proxy)
193
req = urllib.request.Request(target_url + webm, None, headers)
194
webm_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
195
else:
196
req = urllib.request.Request(target_url + webm, None, headers)
197
webm_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
198
print('+Video (.webm) found: ' + target_url + webm.split('"')[0])
199
size = len(webm_file)
200
webms[webm] = int(size)
201
print('(Size: ' + str(size) + ' Bytes)')
202
self.c_webm = self.c_webm + 1
203
print('-'*12)
204
except:
205
size = 0
206
except:
207
print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
208
size = 0
209
biggest_webm = max(list(webms.keys()), key=lambda x: webms[x]) # search/extract biggest video (.webm) value from dict
210
if biggest_webm:
211
biggest_files[biggest_webm] = webms[biggest_webm] # add biggest video (.webm) to list
212
except: # if not any .webm found, go for next
213
pass
214
try: # search for .avi files
215
regex_avi = []
216
regex_avi1 = "<a href='(.+?.avi)'" # search on target's results using regex with simple quotation
217
regex_avi.append(regex_avi1)
218
regex_avi2 = '<a href="(.+?.avi)"' # search on target's results using regex with double quotation
219
regex_avi.append(regex_avi2)
220
#regex_avi3 = '<a href=(.+?.avi)' # search on target's results using regex without quotations
221
#regex_avi.append(regex_avi3)
222
for regavi in regex_avi:
223
pattern_avi = re.compile(regavi)
224
avi_links = re.findall(pattern_avi, target_reply)
225
avis = {}
226
for avi in avi_links:
227
try:
228
if avi.startswith('http'):
229
size = 0
230
else:
231
target_host = urlparse(target)
232
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
233
if not target_url.endswith('/'): # add "/" to end of target
234
target_url = target_url + "/"
235
try:
236
if self.ufonet.options.proxy: # set proxy
237
self.proxy_transport(self.ufonet.options.proxy)
238
req = urllib.request.Request(target_url + avi, None, headers)
239
avi_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
240
else:
241
req = urllib.request.Request(target_url + avi, None, headers)
242
avi_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
243
print('+Video (.avi) found: ' + target_url + avi.split('"')[0])
244
size = len(avi_file)
245
avis[avi] = int(size)
246
print('(Size: ' + str(size) + ' Bytes)')
247
self.c_avi = self.c_avi + 1
248
print('-'*12)
249
except:
250
size = 0
251
except:
252
print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
253
size = 0
254
biggest_avi = max(list(avis.keys()), key=lambda x: avis[x]) # search/extract biggest video (.avi) value from dict
255
if biggest_avi:
256
biggest_files[biggest_avi] = avis[biggest_avi] # add biggest video (.avi) to list
257
except: # if not any .avi found, go for next
258
pass
259
try: # search for .swf files
260
regex_swf = []
261
regex_swf1 = "<value='(.+?.swf)'" # search on target's results using regex with simple quotation
262
regex_swf.append(regex_swf1)
263
regex_swf2 = '<value="(.+?.swf)"' # search on target's results using regex with double quotation
264
regex_swf.append(regex_swf2)
265
#regex_swf3 = '<value=(.+?.swf)' # search on target's results using regex without quotations
266
#regex_swf.append(regex_swf3)
267
for regswf in regex_swf:
268
pattern_swf = re.compile(regswf)
269
swf_links = re.findall(pattern_swf, target_reply)
270
swfs = {}
271
for swf in swf_links:
272
try:
273
if swf.startswith('http'):
274
size = 0
275
else:
276
target_host = urlparse(target)
277
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
278
if not target_url.endswith('/'): # add "/" to end of target
279
target_url = target_url + "/"
280
try:
281
if self.ufonet.options.proxy: # set proxy
282
self.proxy_transport(self.ufonet.options.proxy)
283
req = urllib.request.Request(target_url + swf, None, headers)
284
swf_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
285
else:
286
req = urllib.request.Request(target_url + swf, None, headers)
287
swf_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
288
print('+Flash (.swf) found: ' + target_url + swf.split('"')[0])
289
size = len(swf_file)
290
swfs[swf] = int(size)
291
print('(Size: ' + str(size) + ' Bytes)')
292
self.c_swf = self.c_swf + 1
293
print('-'*12)
294
except:
295
size = 0
296
except:
297
print('[Error] [AI] Unable to retrieve info from Flash -> [Discarding!]')
298
size = 0
299
biggest_swf = max(list(swfs.keys()), key=lambda x: swfs[x]) # search/extract biggest flash (.swf) value from dict
300
if biggest_swf:
301
biggest_files[biggest_swf] = swfs[biggest_swf] # add biggest flash (.swf) to list
302
except: # if not any .swf found, go for next
303
pass
304
try: # search for .mpg files
305
regex_mpg = []
306
regex_mpg1 = "<src='(.+?.mpg)'" # search on target's results using regex with simple quotation
307
regex_mpg.append(regex_mpg1)
308
regex_mpg2 = '<src="(.+?.mpg)"' # search on target's results using regex with double quotation
309
regex_mpg.append(regex_mpg2)
310
#regex_mpg3 = '<src=(.+?.mpg)' # search on target's results using regex without quotations
311
#regex_mpg.append(regex_mpg3)
312
for regmpg in regex_mpg:
313
pattern_mpg = re.compile(regmpg)
314
mpg_links = re.findall(pattern_mpg, target_reply)
315
mpgs = {}
316
for mpg in mpg_links:
317
try:
318
if mpg.startswith('http'):
319
size = 0
320
else:
321
target_host = urlparse(target)
322
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
323
if not target_url.endswith('/'): # add "/" to end of target
324
target_url = target_url + "/"
325
try:
326
if self.ufonet.options.proxy: # set proxy
327
self.proxy_transport(self.ufonet.options.proxy)
328
req = urllib.request.Request(target_url + mpg, None, headers)
329
mpg_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
330
else:
331
req = urllib.request.Request(target_url + mpg, None, headers)
332
mpg_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
333
print('+Video (.mpg) found: ' + target_url + mpg.split('"')[0])
334
size = len(mpg_file)
335
mpgs[mpg] = int(size)
336
print('(Size: ' + str(size) + ' Bytes)')
337
self.c_mpg = self.c_mpg + 1
338
print('-'*12)
339
except:
340
size = 0
341
except:
342
print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
343
size = 0
344
biggest_mpg = max(list(mpgs.keys()), key=lambda x: mpgs[x]) # search/extract biggest video (.mpg) value from dict
345
if biggest_mpg:
346
biggest_files[biggest_mpg] = mpgs[biggest_mpg] # add biggest video (.mpg) to list
347
except: # if not any .mpg found, go for next
348
pass
349
try: # search for .mpeg files
350
regex_mpeg = []
351
regex_mpeg1 = "<src='(.+?.mpeg)'" # search on target's results using regex with simple quotation
352
regex_mpeg.append(regex_mpeg1)
353
regex_mpeg2 = '<src="(.+?.mpeg)"' # search on target's results using regex with double quotation
354
regex_mpeg.append(regex_mpeg2)
355
#regex_mpeg3 = '<src=(.+?.mpeg)' # search on target's results using regex without quotations
356
#regex_mpeg.append(regex_mpeg3)
357
for regmpeg in regex_mpeg:
358
pattern_mpeg = re.compile(regmpeg)
359
mpeg_links = re.findall(pattern_mpeg, target_reply)
360
mpegs = {}
361
for mpeg in mpeg_links:
362
try:
363
if mpeg.startswith('http'):
364
size = 0
365
else:
366
target_host = urlparse(target)
367
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
368
if not target_url.endswith('/'): # add "/" to end of target
369
target_url = target_url + "/"
370
try:
371
if self.ufonet.options.proxy: # set proxy
372
self.proxy_transport(self.ufonet.options.proxy)
373
req = urllib.request.Request(target_url + mpeg, None, headers)
374
mpeg_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
375
else:
376
req = urllib.request.Request(target_url + mpeg, None, headers)
377
mpeg_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
378
print('+Video (.mpeg) found: ' + target_url + mpeg.split('"')[0])
379
size = len(mpeg_file)
380
mpegs[mpeg] = int(size)
381
print('(Size: ' + str(size) + ' Bytes)')
382
self.c_mpeg = self.c_mpeg + 1
383
print('-'*12)
384
except:
385
size = 0
386
except:
387
print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
388
size = 0
389
biggest_mpeg = max(list(mpegs.keys()), key=lambda x: mpegs[x]) # search/extract biggest video (.mpeg) value from dict
390
if biggest_mpeg:
391
biggest_files[biggest_mpeg] = mpegs[biggest_mpeg] # add biggest video (.mpeg) to list
392
except: # if not any .mpeg found, go for next
393
pass
394
try: # search for .mp3 files
395
regex_mp3 = []
396
regex_mp31 = "<src='(.+?.mp3)'" # search on target's results using regex with simple quotation
397
regex_mp3.append(regex_mp31)
398
regex_mp32 = '<src="(.+?.mp3)"' # search on target's results using regex with double quotation
399
regex_mp3.append(regex_mp32)
400
#regex_mp33 = '<src=(.+?.mp3)' # search on target's results using regex without quotations
401
#regex_mp3.append(regex_mp33)
402
for regmp3 in regex_mp3:
403
pattern_mp3 = re.compile(regmp3)
404
mp3_links = re.findall(pattern_mp3, target_reply)
405
mp3s = {}
406
for mp3 in mp3_links:
407
try:
408
if mp3.startswith('http'):
409
size = 0
410
else:
411
target_host = urlparse(target)
412
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
413
if not target_url.endswith('/'): # add "/" to end of target
414
target_url = target_url + "/"
415
try:
416
if self.ufonet.options.proxy: # set proxy
417
self.proxy_transport(self.ufonet.options.proxy)
418
req = urllib.request.Request(target_url + mp3, None, headers)
419
mp3_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
420
else:
421
req = urllib.request.Request(target_url + mp3, None, headers)
422
mp3_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
423
print('+Audio (.mp3) found: ' + target_url + mp3.split('"')[0])
424
size = len(mp3_file)
425
mp3s[mp3] = int(size)
426
print('(Size: ' + str(size) + ' Bytes)')
427
self.c_mp3 = self.c_mp3 + 1
428
print('-'*12)
429
except:
430
size = 0
431
except:
432
print('[Error] [AI] Unable to retrieve info from Audio -> [Discarding!]')
433
size = 0
434
biggest_mp3 = max(list(mp3s.keys()), key=lambda x: mp3s[x]) # search/extract biggest audio (.mp3) value from dict
435
if biggest_mp3:
436
biggest_files[biggest_mp3] = mp3s[biggest_mp3] # add biggest audio (.mp3) to list
437
except: # if not any .mp3 found, go for next
438
pass
439
try: # search for .mp4 files
440
regex_mp4 = []
441
regex_mp41 = "<src='(.+?.mp4)'" # search on target's results using regex with simple quotation
442
regex_mp4.append(regex_mp41)
443
regex_mp42 = '<src="(.+?.mp4)"' # search on target's results using regex with double quotation
444
regex_mp4.append(regex_mp42)
445
#regex_mp43 = '<src=(.+?.mp4)' # search on target's results using regex without quotations
446
#regex_mp4.append(regex_mp43)
447
for regmp4 in regex_mp4:
448
pattern_mp4 = re.compile(regmp4)
449
mp4_links = re.findall(pattern_mp4, target_reply)
450
mp4s = {}
451
for mp4 in mp4_links:
452
try:
453
if mp4.startswith('http'):
454
size = 0
455
else:
456
target_host = urlparse(target)
457
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
458
if not target_url.endswith('/'): # add "/" to end of target
459
target_url = target_url + "/"
460
try:
461
if self.ufonet.options.proxy: # set proxy
462
self.proxy_transport(self.ufonet.options.proxy)
463
req = urllib.request.Request(target_url + mp4, None, headers)
464
mp4_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
465
else:
466
req = urllib.request.Request(target_url + mp4, None, headers)
467
mp4_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
468
print('+Video (.mp4) found: ' + target_url + mp4.split('"')[0])
469
size = len(mp4_file)
470
mp4s[mp4] = int(size)
471
print('(Size: ' + str(size) + ' Bytes)')
472
self.c_mp4 = self.c_mp4 + 1
473
print('-'*12)
474
except:
475
size = 0
476
except:
477
print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
478
size = 0
479
biggest_mp4 = max(list(mp4s.keys()), key=lambda x: mp4s[x]) # search/extract biggest video (.mp4) value from dict
480
if biggest_mp4:
481
biggest_files[biggest_mp4] = mp4s[biggest_mp4] # add biggest video (.mp4) to list
482
except: # if not any .mp4 found, go for next
483
pass
484
try: # search for .ogg files
485
regex_ogg = []
486
regex_ogg1 = "<src='(.+?.ogg)'" # search on target's results using regex with simple quotation
487
regex_ogg.append(regex_ogg1)
488
regex_ogg2 = '<src="(.+?.ogg)"' # search on target's results using regex with double quotation
489
regex_ogg.append(regex_ogg2)
490
#regex_ogg3 = '<src=(.+?.ogg)' # search on target's results using regex without quotations
491
#regex_ogg.append(regex_ogg3)
492
for regogg in regex_ogg:
493
pattern_ogg = re.compile(regogg)
494
ogg_links = re.findall(pattern_ogg, target_reply)
495
oggs = {}
496
for ogg in ogg_links:
497
try:
498
if ogg.startswith('http'):
499
size = 0
500
else:
501
target_host = urlparse(target)
502
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
503
if not target_url.endswith('/'): # add "/" to end of target
504
target_url = target_url + "/"
505
try:
506
if self.ufonet.options.proxy: # set proxy
507
self.proxy_transport(self.ufonet.options.proxy)
508
req = urllib.request.Request(target_url + ogg, None, headers)
509
ogg_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
510
else:
511
req = urllib.request.Request(target_url + ogg, None, headers)
512
ogg_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
513
print('+Audio (.ogg) found: ' + target_url + ogg.split('"')[0])
514
size = len(ogg_file)
515
oggs[ogg] = int(size)
516
print('(Size: ' + str(size) + ' Bytes)')
517
self.c_ogg = self.c_ogg + 1
518
print('-'*12)
519
except:
520
size = 0
521
except:
522
print('[Error] [AI] Unable to retrieve info from Audio -> [Discarding!]')
523
size = 0
524
biggest_ogg = max(list(oggs.keys()), key=lambda x: oggs[x]) # search/extract biggest video (.ogg) value from dict
525
if biggest_ogg:
526
biggest_files[biggest_ogg] = oggs[biggest_ogg] # add biggest video (.ogg) to list
527
except: # if not any .ogg found, go for next
528
pass
529
try: # search for .ogv files
530
regex_ogv = []
531
regex_ogv1 = "<src='(.+?.ogv)'" # search on target's results using regex with simple quotation
532
regex_ogv.append(regex_ogv1)
533
regex_ogv2 = '<src="(.+?.ogv)"' # search on target's results using regex with double quotation
534
regex_ogv.append(regex_ogv2)
535
#regex_ogv3 = '<src=(.+?.ogv)' # search on target's results using regex without quotations
536
#regex_ogv.append(regex_ogv3)
537
for regogv in regex_ogv:
538
pattern_ogv = re.compile(regogv)
539
ogv_links = re.findall(pattern_ogv, target_reply)
540
ogvs = {}
541
for ogv in ogv_links:
542
try:
543
if ogv.startswith('http'):
544
size = 0
545
else:
546
target_host = urlparse(target)
547
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
548
if not target_url.endswith('/'): # add "/" to end of target
549
target_url = target_url + "/"
550
try:
551
if self.ufonet.options.proxy: # set proxy
552
self.proxy_transport(self.ufonet.options.proxy)
553
req = urllib.request.Request(target_url + ogv, None, headers)
554
ogv_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
555
else:
556
req = urllib.request.Request(target_url + ogv, None, headers)
557
ogv_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
558
print('+Video (.ogv) found: ' + target_url + ogv.split('"')[0])
559
size = len(ogv_file)
560
ogvs[ogv] = int(size)
561
print('(Size: ' + str(size) + ' Bytes)')
562
self.c_ogv = self.c_ogv + 1
563
print('-'*12)
564
except:
565
size = 0
566
except:
567
print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
568
size = 0
569
biggest_ogv = max(list(ogvs.keys()), key=lambda x: ogvs[x]) # search/extract biggest video (.ogv) value from dict
570
if biggest_ogv:
571
biggest_files[biggest_ogv] = ogvs[biggest_ogv] # add biggest video (.ogv) to list
572
except: # if not any .ogv found, go for next
573
pass
574
try: # search for .wmv files
575
regex_wmv = []
576
regex_wmv1 = "<src='(.+?.wmv)'" # search on target's results using regex with simple quotation
577
regex_wmv.append(regex_wmv1)
578
regex_wmv2 = '<src="(.+?.wmv)"' # search on target's results using regex with double quotation
579
regex_wmv.append(regex_wmv2)
580
#regex_wmv3 = '<src=(.+?.wmv)' # search on target's results using regex without quotations
581
#regex_wmv.append(regex_wmv3)
582
for regwmv in regex_wmv:
583
pattern_wmv = re.compile(regwmv)
584
wmv_links = re.findall(pattern_wmv, target_reply)
585
wmvs = {}
586
for wmv in wmv_links:
587
try:
588
if wmv.startswith('http'):
589
size = 0
590
else:
591
target_host = urlparse(target)
592
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
593
if not target_url.endswith('/'): # add "/" to end of target
594
target_url = target_url + "/"
595
try:
596
if self.ufonet.options.proxy: # set proxy
597
self.proxy_transport(self.ufonet.options.proxy)
598
req = urllib.request.Request(target_url + wmv, None, headers)
599
wmv_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
600
else:
601
req = urllib.request.Request(target_url + wmv, None, headers)
602
wmv_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
603
print('+Video (.wmv) found: ' + target_url + wmv.split('"')[0])
604
size = len(wmv_file)
605
wmvs[wmv] = int(size)
606
print('(Size: ' + str(size) + ' Bytes)')
607
self.c_wmv = self.c_wmv + 1
608
print('-'*12)
609
except:
610
size = 0
611
except:
612
print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
613
size = 0
614
biggest_wmv = max(list(wmvs.keys()), key=lambda x: wmvs[x]) # search/extract biggest video (.wmv) value from dict
615
if biggest_wmv:
616
biggest_files[biggest_wmv] = wmvs[biggest_wmv] # add biggest video (.wmv) to list
617
except: # if not any .wmv found, go for next
618
pass
619
try: # search for .css files
620
regex_css = []
621
regex_css1 = "href='(.+?.css[^']*)'" # search on target's results using regex with simple quotation
622
regex_css.append(regex_css1)
623
regex_css2 = 'href="(.+?.css[^"]*)"' # search on target's results using regex with double quotation
624
regex_css.append(regex_css2)
625
#regex_css3 = "href=(.+?.css[^']*)" # search on target's results using regex without quotations
626
#regex_css.append(regex_css3)
627
for regcss in regex_css:
628
pattern_css = re.compile(regcss)
629
css_links = re.findall(pattern_css, target_reply)
630
csss = {}
631
for css in css_links:
632
try:
633
if css.startswith('http'):
634
size = 0
635
else:
636
target_host = urlparse(target)
637
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
638
if not target_url.endswith('/'): # add "/" to end of target
639
target_url = target_url + "/"
640
if css.startswith("//"):
641
size = 0
642
elif "http://" in css or "https://" in css:
643
size = 0
644
else:
645
if css.startswith('/'):
646
css = css.replace("/", "", 1)
647
try:
648
if self.ufonet.options.proxy: # set proxy
649
self.proxy_transport(self.ufonet.options.proxy)
650
req = urllib.request.Request(target_url + css, None, headers)
651
css_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
652
else:
653
req = urllib.request.Request(target_url + css, None, headers)
654
css_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
655
print('+Style (.css) found: ' + target_url + css.split('"')[0])
656
size = len(css_file)
657
csss[css] = int(size)
658
print('(Size: ' + str(size) + ' Bytes)')
659
self.c_css = self.c_css + 1
660
print('-'*12)
661
except:
662
size = 0
663
except:
664
print('[Error] [AI] Unable to retrieve info from Style -> [Discarding!]')
665
size = 0
666
biggest_css = max(list(csss.keys()), key=lambda x: csss[x]) # search/extract biggest style (.css) value from dict
667
if biggest_css:
668
biggest_files[biggest_css] = csss[biggest_css] # add biggest style (.css) to list
669
except: # if not any .css found, go for next
670
pass
671
try: # search for .js files
672
regex_js = []
673
regex_js1 = "src='(.+?.js[^']*)'" # search on target's results using regex with simple quotation
674
regex_js.append(regex_js1)
675
regex_js2 = 'src="(.+?.js[^"]*)"' # search on target's results using regex with double quotation
676
regex_js.append(regex_js2)
677
#regex_js3 = "src=(.+?.js[^']*)" # search on target's results using regex without quotations
678
#regex_js.append(regex_js3)
679
for regjs in regex_js:
680
pattern_js = re.compile(regjs)
681
js_links = re.findall(pattern_js, target_reply)
682
jss = {}
683
for js in js_links:
684
try:
685
if js.startswith('http'):
686
size = 0
687
else:
688
target_host = urlparse(target)
689
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
690
if not target_url.endswith('/'): # add "/" to end of target
691
target_url = target_url + "/"
692
if js.startswith("//"):
693
size = 0
694
elif "http://" in js or "https://" in js:
695
size = 0
696
else:
697
if js.startswith('/'):
698
js = js.replace("/", "", 1)
699
print('+Script (.js) found: ' + target_url + js.split('"')[0])
700
if self.ufonet.options.proxy: # set proxy
701
self.proxy_transport(self.ufonet.options.proxy)
702
req = urllib.request.Request(target_url + js, None, headers)
703
js_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
704
else:
705
req = urllib.request.Request(target_url + js, None, headers)
706
js_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
707
size = len(js_file)
708
jss[js] = int(size)
709
print('(Size: ' + str(size) + ' Bytes)')
710
self.c_js = self.c_js + 1
711
print('-'*12)
712
except:
713
print('[Error] [AI] Unable to retrieve info from Script -> [Discarding!]')
714
size = 0
715
biggest_js = max(list(jss.keys()), key=lambda x: jss[x]) # search/extract biggest script (.js) value from dict
716
if biggest_js:
717
biggest_files[biggest_js] = jss[biggest_js] # add biggest script (.js) to list
718
except: # if not any .js found, go for next
719
pass
720
try: # search for .xml files
721
regex_xml = []
722
regex_xml1 = "href='(.+?.xml)'" # search on target's results using regex with simple quotation
723
regex_xml.append(regex_xml1)
724
regex_xml2 = 'href="(.+?.xml)"' # search on target's results using regex with double quotation
725
regex_xml.append(regex_xml2)
726
#regex_xml3 = 'href=(.+?.xml)' # search on target's results using regex without quotations
727
#regex_xml.append(regex_xml3)
728
for regxml in regex_xml:
729
pattern_xml = re.compile(regxml)
730
xml_links = re.findall(pattern_xml, target_reply)
731
xmls = {}
732
for xml in xml_links:
733
try:
734
if xml.startswith('http'):
735
size = 0
736
else:
737
target_host = urlparse(target)
738
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
739
if not target_url.endswith('/'): # add "/" to end of target
740
target_url = target_url + "/"
741
try:
742
if self.ufonet.options.proxy: # set proxy
743
self.proxy_transport(self.ufonet.options.proxy)
744
req = urllib.request.Request(target_url + xml, None, headers)
745
xml_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
746
else:
747
req = urllib.request.Request(target_url + xml, None, headers)
748
xml_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
749
print('+Script (.xml) found: ' + target_url + xml).split('"')[0]
750
size = len(xml_file)
751
xmls[xml] = int(size)
752
print('(Size: ' + str(size) + ' Bytes)')
753
self.c_xml = self.c_xml + 1
754
print('-'*12)
755
except:
756
size = 0
757
except:
758
print('[Error] [AI] Unable to retrieve info from Script -> [Discarding!]')
759
size = 0
760
biggest_xml = max(list(xmls.keys()), key=lambda x: xmls[x]) # search/extract biggest script (.xml) value from dict
761
if biggest_xml:
762
biggest_files[biggest_xml] = xmls[biggest_xml] # add biggest script (.xml) to list
763
except: # if not any .xml found, go for next
764
pass
765
try: # search for .php files
766
regex_php = []
767
regex_php1 = "href='(.+?.php)'" # search on target's results using regex with simple quotation
768
regex_php.append(regex_php1)
769
regex_php2 = 'href="(.+?.php)"' # search on target's results using regex with double quotation
770
regex_php.append(regex_php2)
771
#regex_php3 = 'href=(.+?.php)' # search on target's results using regex without quotations
772
#regex_php.append(regex_php3)
773
for regphp in regex_php:
774
pattern_php = re.compile(regphp)
775
php_links = re.findall(pattern_php, target_reply)
776
phps = {}
777
for php in php_links:
778
try:
779
if php.startswith('http'):
780
size = 0
781
else:
782
target_host = urlparse(target)
783
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
784
if not target_url.endswith('/'): # add "/" to end of target
785
target_url = target_url + "/"
786
try:
787
if self.ufonet.options.proxy: # set proxy
788
self.proxy_transport(self.ufonet.options.proxy)
789
req = urllib.request.Request(target_url + php, None, headers)
790
php_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
791
else:
792
req = urllib.request.Request(target_url + php, None, headers)
793
php_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
794
print('+Webpage (.php) found: ' + target_url + php.split('"')[0])
795
size = len(php_file)
796
phps[php] = int(size)
797
print('(Size: ' + str(size) + ' Bytes)')
798
self.c_php = self.c_php + 1
799
print('-'*12)
800
except:
801
size = 0
802
except:
803
print('[Error] [AI] Unable to retrieve info from Webpage -> [Discarding!]')
804
size = 0
805
biggest_php = max(list(phps.keys()), key=lambda x: phps[x]) # search/extract biggest file (.php) value from dict
806
if biggest_php:
807
biggest_files[biggest_php] = phps[biggest_php] # add biggest file (.php) to list
808
except: # if not any .php found, go for next
809
pass
810
try: # search for .html files
811
regex_html = []
812
regex_html1 = "href='(.+?.html)'" # search on target's results using regex with simple quotation
813
regex_html.append(regex_html1)
814
regex_html2 = 'href="(.+?.html)"' # search on target's results using regex with double quotation
815
regex_html.append(regex_html2)
816
#regex_html3 = 'href=(.+?.html)' # search on target's results using regex without quotations
817
#regex_html.append(regex_html3)
818
for reghtml in regex_html:
819
pattern_html = re.compile(reghtml)
820
html_links = re.findall(pattern_html, target_reply)
821
htmls = {}
822
for html in html_links:
823
try:
824
if html.startswith('http'):
825
size = 0
826
else:
827
target_host = urlparse(target)
828
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
829
if not target_url.endswith('/'): # add "/" to end of target
830
target_url = target_url + "/"
831
try:
832
if self.ufonet.options.proxy: # set proxy
833
self.proxy_transport(self.ufonet.options.proxy)
834
req = urllib.request.Request(target_url + html, None, headers)
835
html_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
836
else:
837
req = urllib.request.Request(target_url + html, None, headers)
838
html_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
839
print('+Webpage (.html) found: ' + target_url + html.split('"')[0])
840
size = len(html_file)
841
htmls[html] = int(size)
842
print('(Size: ' + str(size) + ' Bytes)')
843
self.c_html = self.c_html + 1
844
print('-'*12)
845
except:
846
size = 0
847
except:
848
print('[Error] [AI] Unable to retrieve info from Webpage -> [Discarding!]')
849
size = 0
850
biggest_html = max(list(htmls.keys()), key=lambda x: htmls[x]) # search/extract biggest file (.html) value from dict
851
if biggest_html:
852
biggest_files[biggest_html] = htmls[biggest_html] # add biggest file (.html) to list
853
except: # if not any .html found, go for next
854
pass
855
try: # search for .jsp files
856
regex_jsp = []
857
regex_jsp1 = "href='(.+?.jsp)'" # search on target's results using regex with simple quotation
858
regex_jsp.append(regex_jsp1)
859
regex_jsp2 = 'href="(.+?.jsp)"' # search on target's results using regex with double quotation
860
regex_jsp.append(regex_jsp2)
861
#regex_jsp3 = 'href=(.+?.jsp)' # search on target's results using regex without quotations
862
#regex_jsp.append(regex_jsp3)
863
for regjsp in regex_jsp:
864
pattern_jsp = re.compile(regjsp)
865
jsp_links = re.findall(pattern_jsp, target_reply)
866
jsps = {}
867
for jsp in jsp_links:
868
try:
869
if jsp.startswith('http'):
870
size = 0
871
else:
872
target_host = urlparse(target)
873
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
874
if not target_url.endswith('/'): # add "/" to end of target
875
target_url = target_url + "/"
876
try:
877
if self.ufonet.options.proxy: # set proxy
878
self.proxy_transport(self.ufonet.options.proxy)
879
req = urllib.request.Request(target_url + jsp, None, headers)
880
jsp_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
881
else:
882
req = urllib.request.Request(target_url + jsp, None, headers)
883
jsp_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
884
print('+Webpage (.jsp) found: ' + target_url + jsp.split('"')[0])
885
size = len(jsp_file)
886
jsps[jsp] = int(size)
887
print('(Size: ' + str(size) + ' Bytes)')
888
self.c_jsp = self.c_jsp + 1
889
print('-'*12)
890
except:
891
size = 0
892
except:
893
print('[Error] [AI] Unable to retrieve info from Webpage -> [Discarding!]')
894
size = 0
895
biggest_jsp = max(list(jsps.keys()), key=lambda x: jsps[x]) # search/extract biggest file (.jsp) value from dict
896
if biggest_jsp:
897
biggest_files[biggest_jsp] = jsps[biggest_jsp] # add biggest file (.jsp) to list
898
except: # if not any .jsp found, go for next
899
pass
900
try: # search for .asp files
901
regex_asp = []
902
regex_asp1 = "href='(.+?.asp)'" # search on target's results using regex with simple quotation
903
regex_asp.append(regex_asp1)
904
regex_asp2 = 'href="(.+?.asp)"' # search on target's results using regex with double quotation
905
regex_asp.append(regex_asp2)
906
#regex_asp3 = 'href=(.+?.asp)' # search on target's results using regex without quotations
907
#regex_asp.append(regex_asp3)
908
for regasp in regex_asp:
909
pattern_asp = re.compile(regasp)
910
asp_links = re.findall(pattern_asp, target_reply)
911
asps = {}
912
for asp in asp_links:
913
try:
914
if asp.startswith('http'):
915
size = 0
916
else:
917
target_host = urlparse(target)
918
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
919
if not target_url.endswith('/'): # add "/" to end of target
920
target_url = target_url + "/"
921
try:
922
if self.ufonet.options.proxy: # set proxy
923
self.proxy_transport(self.ufonet.options.proxy)
924
req = urllib.request.Request(target_url + asp, None, headers)
925
asp_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
926
else:
927
req = urllib.request.Request(target_url + asp, None, headers)
928
asp_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
929
print('+Webpage (.asp) found: ' + target_url + asp.split('"')[0])
930
size = len(asp_file)
931
asps[asp] = int(size)
932
print('(Size: ' + str(size) + ' Bytes)')
933
self.c_asp = self.c_asp + 1
934
print('-'*12)
935
except:
936
size = 0
937
except:
938
print('[Error] [AI] Unable to retrieve info from Webpage -> [Discarding!]')
939
size = 0
940
biggest_asp = max(list(asps.keys()), key=lambda x: asps[x]) # search/extract biggest file (.asp) value from dict
941
if biggest_asp:
942
biggest_files[biggest_asp] = asps[biggest_asp] # add biggest file (.asp) to list
943
except: # if not any .asp found, go for next
944
pass
945
try: # search for .txt files
946
regex_txt = []
947
regex_txt1 = "href='(.+?.txt)'" # search on target's results using regex with simple quotation
948
regex_txt.append(regex_txt1)
949
regex_txt2 = 'href="(.+?.txt)"' # search on target's results using regex with double quotation
950
regex_txt.append(regex_txt2)
951
#regex_txt3 = 'href=(.+?.txt)' # search on target's results using regex without quotations
952
#regex_txt.append(regex_txt3)
953
for regtxt in regex_txt:
954
pattern_txt = re.compile(regtxt)
955
txt_links = re.findall(pattern_txt, target_reply)
956
txts = {}
957
for txt in txt_links:
958
try:
959
if txt.startswith('http'):
960
size = 0
961
else:
962
target_host = urlparse(target)
963
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
964
if not target_url.endswith('/'): # add "/" to end of target
965
target_url = target_url + "/"
966
try:
967
if self.ufonet.options.proxy: # set proxy
968
self.proxy_transport(self.ufonet.options.proxy)
969
req = urllib.request.Request(target_url + txt, None, headers)
970
txt_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
971
else:
972
req = urllib.request.Request(target_url + txt, None, headers)
973
txt_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
974
print('+File (.txt) found: ' + target_url + txt.split('"')[0])
975
size = len(txt_file)
976
txts[txt] = int(size)
977
print('(Size: ' + str(size) + ' Bytes)')
978
self.c_txt = self.c_txt + 1
979
print('-'*12)
980
except:
981
size = 0
982
except:
983
print('[Error] [AI] Unable to retrieve info from Text file -> [Discarding!]')
984
size = 0
985
biggest_txt = max(list(txts.keys()), key=lambda x: txts[x]) # search/extract biggest file (.txt) value from dict
986
if biggest_text:
987
biggest_files[biggest_txt] = txts[biggest_txt] # add biggest file (.txt) to list
988
except: # if not any .txt found, go for next
989
pass
990
print("\n" +'='*80)
991
total_objects = self.c_images + self.c_mov + self.c_webm + self.c_avi + self.c_swf + self.c_mpg + self.c_mpeg + self.c_mp3 + self.c_ogg + self.c_ogv + self.c_wmv + self.c_css + self.c_js + self.c_xml + self.c_php + self.c_html + self.c_jsp + self.c_asp + self.c_txt
992
print("Total objects found:", total_objects)
993
print('-'*20)
994
print("images:", self.c_images)
995
print(".mov :", self.c_mov)
996
print(".jsp :", self.c_jsp)
997
print(".avi :", self.c_avi)
998
print(".html :", self.c_html)
999
print(".mpg :", self.c_mpg)
1000
print(".asp :", self.c_asp)
1001
print(".mp3 :", self.c_mp3)
1002
print(".js :", self.c_js)
1003
print(".ogv :", self.c_ogv)
1004
print(".wmv :", self.c_wmv)
1005
print(".css :", self.c_css)
1006
print(".mpeg :", self.c_mpeg)
1007
print(".xml :", self.c_xml)
1008
print(".php :", self.c_php)
1009
print(".txt :", self.c_txt)
1010
print(".webm :", self.c_webm)
1011
print(".ogg :", self.c_ogg)
1012
print(".swf :", self.c_swf)
1013
print('-'*20)
1014
print('='*80)
1015
if(biggest_files=={}):
1016
print("\n[Info] [AI] Not any link found on target! -> [Exiting!]\n\n")
1017
print('='*80 + '\n')
1018
return
1019
biggest_file_on_target = max(list(biggest_files.keys()), key=lambda x: biggest_files[x]) # search/extract biggest file value from dict
1020
target_host = urlparse(target)
1021
target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
1022
if biggest_file_on_target.startswith('http'): # used for absolute links
1023
for url,size in list(biggest_files.items()): # review all dict values
1024
if url.startswith('http'):
1025
if not target_url in url: # extract/dismiss external links
1026
del biggest_files[url] # remove value from dict
1027
biggest_file_on_target = max(list(biggest_files.keys()), key=lambda x: biggest_files[x]) # extract new value
1028
print('=Biggest File: ' + biggest_file_on_target)
1029
else: # used for relative links
1030
if not target_url.endswith('/'): # add "/" to end of target
1031
target_url = target_url + "/"
1032
print('=Biggest File: ' + target_url + biggest_file_on_target)
1033
print('='*80 + '\n')
1034
1035