Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
1N3
GitHub Repository: 1N3/Sn1per
Path: blob/master/bin/webscreenshot.py
2960 views
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
# This file is part of webscreenshot.
5
#
6
# Copyright (C) 2018, Thomas Debize <tdebize at mail.com>
7
# All rights reserved.
8
#
9
# webscreenshot is free software: you can redistribute it and/or modify
10
# it under the terms of the GNU Lesser General Public License as published by
11
# the Free Software Foundation, either version 3 of the License, or
12
# (at your option) any later version.
13
#
14
# webscreenshot is distributed in the hope that it will be useful,
15
# but WITHOUT ANY WARRANTY; without even the implied warranty of
16
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
# GNU Lesser General Public License for more details.
18
#
19
# You should have received a copy of the GNU Lesser General Public License
20
# along with webscreenshot. If not, see <http://www.gnu.org/licenses/>.
21
22
import re
23
import os
24
import sys
25
import subprocess
26
import datetime
27
import time
28
import signal
29
import multiprocessing
30
import itertools
31
import shlex
32
import logging
33
import errno
34
35
# Script version
36
VERSION = '2.2.1'
37
38
# OptionParser imports
39
from optparse import OptionParser
40
from optparse import OptionGroup
41
42
# Options definition
43
parser = OptionParser(usage="usage: %prog [options] URL")
44
45
main_grp = OptionGroup(parser, 'Main parameters')
46
main_grp.add_option('-i', '--input-file', help = '<INPUT_FILE>: text file containing the target list. Ex: list.txt', nargs = 1)
47
main_grp.add_option('-o', '--output-directory', help = '<OUTPUT_DIRECTORY> (optional): screenshots output directory (default \'./screenshots/\')', nargs = 1)
48
main_grp.add_option('-r', '--renderer', help = '<RENDERER> (optional): renderer to use among \'phantomjs\' (legacy but best results), \'chrome\', \'chromium\' (version > 57) (default \'phantomjs\')', choices = ['phantomjs', 'chrome', 'chromium'], default = 'phantomjs', nargs = 1)
49
main_grp.add_option('-w', '--workers', help = '<WORKERS> (optional): number of parallel execution workers (default 2)', default = 2, nargs = 1)
50
main_grp.add_option('-v', '--verbosity', help = '<VERBOSITY> (optional): verbosity level, repeat it to increase the level { -v INFO, -vv DEBUG } (default verbosity ERROR)', action = 'count', default = 0)
51
52
proc_grp = OptionGroup(parser, 'Input processing parameters')
53
proc_grp.add_option('-p', '--port', help = '<PORT> (optional): use the specified port for each target in the input list. Ex: -p 80', nargs = 1)
54
proc_grp.add_option('-s', '--ssl', help = '<SSL> (optional): enforce ssl for every connection', action = 'store_true', default = False)
55
proc_grp.add_option('-m', '--multiprotocol', help = '<MULTIPROTOCOL> (optional): perform screenshots over HTTP and HTTPS for each target', action = 'store_true', default = False)
56
57
http_grp = OptionGroup(parser, 'HTTP parameters')
58
http_grp.add_option('-c', '--cookie', help = '<COOKIE_STRING> (optional): cookie string to add. Ex: -c "JSESSIONID=1234; YOLO=SWAG"', nargs = 1)
59
http_grp.add_option('-a', '--header', help = '<HEADER> (optional): custom or additional header. Repeat this option for every header. Ex: -a "Host: localhost" -a "Foo: bar"', action = 'append')
60
61
http_grp.add_option('-u', '--http-username', help = '<HTTP_USERNAME> (optional): specify a username for HTTP Basic Authentication.')
62
http_grp.add_option('-b', '--http-password', help = '<HTTP_PASSWORD> (optional): specify a password for HTTP Basic Authentication.')
63
64
conn_grp = OptionGroup(parser, 'Connection parameters')
65
conn_grp.add_option('-P', '--proxy', help = '<PROXY> (optional): specify a proxy. Ex: -P http://proxy.company.com:8080')
66
conn_grp.add_option('-A', '--proxy-auth', help = '<PROXY_AUTH> (optional): provides authentication information for the proxy. Ex: -A user:password')
67
conn_grp.add_option('-T', '--proxy-type', help = '<PROXY_TYPE> (optional): specifies the proxy type, "http" (default), "none" (disable completely), or "socks5". Ex: -T socks')
68
conn_grp.add_option('-t', '--timeout', help = '<TIMEOUT> (optional): renderer execution timeout in seconds (default 30 sec)', default = 30, nargs = 1)
69
70
parser.option_groups.extend([main_grp, proc_grp, http_grp, conn_grp])
71
72
# renderer binaries, hoping to find it in a $PATH directory
73
## Be free to change them to your own full-path location
74
PHANTOMJS_BIN = 'phantomjs'
75
CHROME_BIN = 'google-chrome'
76
CHROMIUM_BIN = 'chromium'
77
78
WEBSCREENSHOT_JS = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), './webscreenshot.js'))
79
SCREENSHOTS_DIRECTORY = os.path.abspath(os.path.join(os.getcwdu(), './screenshots/'))
80
81
# Logger definition
82
LOGLEVELS = {0 : 'ERROR', 1 : 'INFO', 2 : 'DEBUG'}
83
logger_output = logging.StreamHandler(sys.stdout)
84
logger_output.setFormatter(logging.Formatter('[%(levelname)s][%(name)s] %(message)s'))
85
86
logger_gen = logging.getLogger("General")
87
logger_gen.addHandler(logger_output)
88
89
# Macros
90
SHELL_EXECUTION_OK = 0
91
SHELL_EXECUTION_ERROR = -1
92
PHANTOMJS_HTTP_AUTH_ERROR_CODE = 2
93
94
# Handful patterns
95
p_ipv4_elementary = '(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})'
96
p_domain = '[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,6}'
97
p_port = '\d{0,5}'
98
p_resource = '(?:/(?P<res>.*))?'
99
100
full_uri_domain = re.compile('^(?P<protocol>http(?:|s))://(?P<host>%s|%s)(?::(?P<port>%s))?%s$' % (p_domain, p_ipv4_elementary, p_port, p_resource))
101
102
fqdn_and_port = re.compile('^(?P<host>%s):(?P<port>%s)%s$' % (p_domain, p_port, p_resource))
103
fqdn_only = re.compile('^(?P<host>%s)%s$' % (p_domain, p_resource))
104
105
ipv4_and_port = re.compile('^(?P<host>%s):(?P<port>%s)%s' % (p_ipv4_elementary, p_port, p_resource))
106
ipv4_only = re.compile('^(?P<host>%s)%s$' % (p_ipv4_elementary, p_resource))
107
108
entry_from_csv = re.compile('^(?P<host>%s|%s)\s+(?P<port>\d+)$' % (p_domain, p_ipv4_elementary))
109
110
# Handful functions
111
def init_worker():
112
"""
113
Tell the workers to ignore a global SIGINT interruption
114
"""
115
signal.signal(signal.SIGINT, signal.SIG_IGN)
116
117
def kill_em_all(signal, frame):
118
"""
119
Terminate all processes while capturing a SIGINT from the user
120
"""
121
logger_gen.info('CTRL-C received, exiting')
122
sys.exit(0)
123
124
def shell_exec(url, command, options):
125
"""
126
Execute a shell command following a timeout
127
Taken from http://howto.pui.ch/post/37471155682/set-timeout-for-a-shell-command-in-python
128
"""
129
global SHELL_EXECUTION_OK, SHELL_EXECUTION_ERROR
130
131
logger_url = logging.getLogger("%s" % url)
132
logger_url.setLevel(options.log_level)
133
134
timeout = int(options.timeout)
135
start = datetime.datetime.now()
136
137
try :
138
p = subprocess.Popen(shlex.split(command), shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
139
140
# binaries timeout
141
while p.poll() is None:
142
time.sleep(0.1)
143
now = datetime.datetime.now()
144
if (now - start).seconds > timeout:
145
logger_url.debug("Shell command PID %s reached the timeout, killing it now" % p.pid)
146
logger_url.error("Screenshot somehow failed\n")
147
148
if sys.platform == 'win32':
149
p.send_signal(signal.SIGTERM)
150
else:
151
p.send_signal(signal.SIGKILL)
152
153
return SHELL_EXECUTION_ERROR
154
155
retval = p.poll()
156
if retval != SHELL_EXECUTION_OK:
157
if retval == PHANTOMJS_HTTP_AUTH_ERROR_CODE:
158
# HTTP Authentication request
159
logger_url.error("HTTP Authentication requested, try to pass credentials with -u and -b options")
160
else:
161
# Phantomjs general error
162
logger_url.error("Shell command PID %s returned an abnormal error code: '%s'" % (p.pid,retval))
163
logger_url.error("Screenshot somehow failed\n")
164
165
return SHELL_EXECUTION_ERROR
166
167
else:
168
# Phantomjs ok
169
logger_url.debug("Shell command PID %s ended normally" % p.pid)
170
logger_url.info("Screenshot OK\n")
171
return SHELL_EXECUTION_OK
172
173
except Exception as e:
174
if e.errno and e.errno == errno.ENOENT :
175
logger_url.error('renderer binary could not have been found in your current PATH environment variable, exiting')
176
else:
177
logger_gen.error('Unknown error: %s, exiting' % e )
178
return SHELL_EXECUTION_ERROR
179
180
def filter_bad_filename_chars(filename):
181
#print (filename)
182
"""
183
Filter bad chars for any filename
184
"""
185
# Before, just avoid triple underscore escape for the classic '://' pattern
186
filename = filename.replace('http://', '')
187
filename = filename.replace('https://', '')
188
#print (filename)
189
190
return re.sub('[^\w\-_\. ]', '-port', filename)
191
#print (filename)
192
193
def extract_all_matched_named_groups(regex, match):
194
"""
195
Return a set of all extractable matched parameters.
196
>>> full_uri_domain.groupindex
197
{'domain': 1, 'port': 3}
198
>>>full_uri_domain.match('http://8.8.8.8:80').group('domain')
199
'8.8.8.8'
200
>>>extract_all_matched_named_groups() => {'domain': '8.8.8.8', 'port': '80'}
201
202
"""
203
result = {}
204
for name, id in regex.groupindex.items():
205
matched_value = match.group(name)
206
if matched_value != None: result[name] = matched_value
207
208
return result
209
210
def entry_format_validator(line):
211
"""
212
Validate the current line against several regexes and return matched parameters (ip, domain, port etc.)
213
"""
214
tab = { 'full_uri_domain' : full_uri_domain,
215
'fqdn_only' : fqdn_only,
216
'fqdn_and_port' : fqdn_and_port,
217
'ipv4_and_port' : ipv4_and_port,
218
'ipv4_only' : ipv4_only,
219
'entry_from_csv' : entry_from_csv
220
}
221
222
for name, regex in tab.items():
223
validator = regex.match(line)
224
if validator:
225
return extract_all_matched_named_groups(regex, validator)
226
227
def parse_targets(options, arguments):
228
"""
229
Parse list and convert each target to valid URI with port(protocol://foobar:port)
230
"""
231
232
target_list = []
233
234
if options.input_file != None:
235
with open(options.input_file,'rb') as fd_input:
236
try:
237
lines = [l.decode('utf-8').lstrip().rstrip().strip() for l in fd_input.readlines()]
238
except UnicodeDecodeError as e:
239
logger_gen.error('Your input file is not UTF-8 encoded, please encode it before using this script')
240
sys.exit(0)
241
else:
242
lines = arguments
243
244
for index, line in enumerate(lines, start=1):
245
matches = entry_format_validator(line)
246
247
# pass if line can be recognized as a correct input, or if no 'host' group could be found with all the regexes
248
if matches == None or not('host' in matches.keys()):
249
logger_gen.warn("Line %s '%s' could not have been recognized as a correct input" % (index, line))
250
pass
251
else:
252
host = matches['host']
253
254
# Protocol is 'http' by default, unless ssl is forced
255
if options.ssl == True:
256
protocol = 'https'
257
elif 'protocol' in matches.keys():
258
protocol = str(matches['protocol'])
259
else:
260
protocol = 'http'
261
262
# Port is ('80' for http) or ('443' for https) by default, unless a specific port is supplied
263
if options.port != None:
264
port = options.port
265
elif 'port' in matches.keys():
266
port = int(matches['port'])
267
268
# if port is 443, assume protocol is https if is not specified
269
protocol = 'https' if port == 443 else protocol
270
else:
271
port = 443 if protocol == 'https' else 80
272
273
# No resource URI by default
274
if 'res' in matches.keys():
275
res = str(matches['res'])
276
else:
277
res = None
278
279
# perform screenshots over HTTP and HTTPS for each target
280
if options.multiprotocol:
281
final_uri_http_port = int(matches['port']) if 'port' in matches.keys() else 80
282
final_uri_http = '%s://%s:%s' % ('http', host, final_uri_http_port)
283
target_list.append(final_uri_http)
284
logger_gen.info("'%s' has been formatted as '%s' with supplied overriding options" % (line, final_uri_http))
285
286
287
final_uri_https_port = int(matches['port']) if 'port' in matches.keys() else 443
288
final_uri_https = '%s://%s:%s' % ('https', host, final_uri_https_port)
289
target_list.append(final_uri_https)
290
logger_gen.info("'%s' has been formatted as '%s' with supplied overriding options" % (line, final_uri_https))
291
292
else:
293
final_uri = '%s://%s:%s' % (protocol, host, port)
294
final_uri = final_uri + '/%s' % res if res != None else final_uri
295
target_list.append(final_uri)
296
297
logger_gen.info("'%s' has been formatted as '%s' with supplied overriding options" % (line, final_uri))
298
299
return target_list
300
301
def craft_cmd(url_and_options):
302
"""
303
Craft the correct command with url and options
304
"""
305
global logger_output, PHANTOMJS_BIN, WEBSCREENSHOT_JS, SCREENSHOTS_DIRECTORY, SHELL_EXECUTION_OK, SHELL_EXECUTION_ERROR
306
307
url, options = url_and_options
308
309
logger_url = logging.getLogger("%s" % url)
310
logger_url.addHandler(logger_output)
311
logger_url.setLevel(options.log_level)
312
313
#output_filename = os.path.join(SCREENSHOTS_DIRECTORY, ('%s.png' % filter_bad_filename_chars(url)))
314
output_filename = os.path.join(SCREENSHOTS_DIRECTORY, ('%s.jpg' % filter_bad_filename_chars(url)))
315
316
# PhantomJS renderer
317
if options.renderer == 'phantomjs':
318
# If you ever want to add some voodoo options to the phantomjs command to be executed, that's here right below
319
cmd_parameters = [ PHANTOMJS_BIN,
320
'--ignore-ssl-errors true',
321
'--ssl-protocol any',
322
'--ssl-ciphers ALL'
323
]
324
325
cmd_parameters.append("--proxy %s" % options.proxy) if options.proxy != None else None
326
cmd_parameters.append("--proxy-auth %s" % options.proxy_auth) if options.proxy_auth != None else None
327
cmd_parameters.append("--proxy-type %s" % options.proxy_type) if options.proxy_type != None else None
328
329
cmd_parameters.append('"%s" url_capture="%s" output_file="%s"' % (WEBSCREENSHOT_JS, url, output_filename))
330
331
cmd_parameters.append('header="Cookie: %s"' % options.cookie.rstrip(';')) if options.cookie != None else None
332
333
cmd_parameters.append('http_username="%s"' % options.http_username) if options.http_username != None else None
334
cmd_parameters.append('http_password="%s"' % options.http_password) if options.http_password != None else None
335
336
if options.header:
337
for header in options.header:
338
cmd_parameters.append('header="%s"' % header.rstrip(';'))
339
340
# Chrome and chromium renderers
341
else:
342
cmd_parameters = [ CHROME_BIN ] if options.renderer == 'chrome' else [ CHROMIUM_BIN ]
343
cmd_parameters += [ '--allow-running-insecure-content',
344
'--ignore-certificate-errors',
345
'--ignore-urlfetcher-cert-requests',
346
'--reduce-security-for-testing',
347
'--no-sandbox',
348
'--headless',
349
'--disable-gpu',
350
'--hide-scrollbars',
351
'--incognito',
352
'-screenshot="%s"' % output_filename,
353
'--window-size=1200,800',
354
'"%s"' % url
355
]
356
cmd_parameters.append('--proxy-server="%s"' % options.proxy) if options.proxy != None else None
357
358
cmd = " ".join(cmd_parameters)
359
360
logger_url.debug("Shell command to be executed\n'%s'\n" % cmd)
361
362
execution_retval = shell_exec(url, cmd, options)
363
364
return execution_retval, url
365
366
367
def take_screenshot(url_list, options):
368
"""
369
Launch the screenshot workers
370
Thanks http://noswap.com/blog/python-multiprocessing-keyboardinterrupt
371
"""
372
global SHELL_EXECUTION_OK, SHELL_EXECUTION_ERROR
373
374
screenshot_number = len(url_list)
375
print "[+] %s URLs to be screenshot" % screenshot_number
376
377
pool = multiprocessing.Pool(processes=int(options.workers), initializer=init_worker)
378
379
taken_screenshots = [r for r in pool.imap(func=craft_cmd, iterable=itertools.izip(url_list, itertools.repeat(options)))]
380
381
screenshots_error_url = [url for retval, url in taken_screenshots if retval == SHELL_EXECUTION_ERROR]
382
screenshots_error = sum(retval == SHELL_EXECUTION_ERROR for retval, url in taken_screenshots)
383
screenshots_ok = int(screenshot_number - screenshots_error)
384
385
print "[+] %s actual URLs screenshot" % screenshots_ok
386
print "[+] %s error(s)" % screenshots_error
387
388
if screenshots_error != 0:
389
for url in screenshots_error_url:
390
print " %s" % url
391
392
return None
393
394
def main():
395
"""
396
Dat main
397
"""
398
global VERSION, SCREENSHOTS_DIRECTORY, LOGLEVELS
399
signal.signal(signal.SIGINT, kill_em_all)
400
401
print 'webscreenshot.py version %s\n' % VERSION
402
403
options, arguments = parser.parse_args()
404
405
try :
406
options.log_level = LOGLEVELS[options.verbosity]
407
logger_gen.setLevel(options.log_level)
408
except :
409
parser.error("Please specify a valid log level")
410
411
if (options.input_file == None and (len(arguments) > 1 or len(arguments) == 0)):
412
parser.error('Please specify a valid input file or a valid URL')
413
414
if (options.input_file != None and len(arguments) == 1):
415
parser.error('Please specify either an input file or an URL')
416
417
if (options.output_directory != None):
418
SCREENSHOTS_DIRECTORY = os.path.abspath(os.path.join(os.getcwdu(), options.output_directory))
419
420
logger_gen.debug("Options: %s\n" % options)
421
if not os.path.exists(SCREENSHOTS_DIRECTORY):
422
logger_gen.info("'%s' does not exist, will then be created" % SCREENSHOTS_DIRECTORY)
423
os.makedirs(SCREENSHOTS_DIRECTORY)
424
425
url_list = parse_targets(options, arguments)
426
427
take_screenshot(url_list, options)
428
429
return None
430
431
if __name__ == "__main__" :
432
main()
433