Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sqlmapproject
GitHub Repository: sqlmapproject/sqlmap
Path: blob/master/thirdparty/keepalive/keepalive.py
2992 views
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
# This library is free software; you can redistribute it and/or
5
# modify it under the terms of the GNU Lesser General Public
6
# License as published by the Free Software Foundation; either
7
# version 2.1 of the License, or (at your option) any later version.
8
#
9
# This library is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
# Lesser General Public License for more details.
13
#
14
# You should have received a copy of the GNU Lesser General Public
15
# License along with this library; if not, write to the
16
# Free Software Foundation, Inc.,
17
# 59 Temple Place, Suite 330,
18
# Boston, MA 02111-1307 USA
19
20
# This file was part of urlgrabber, a high-level cross-protocol url-grabber
21
# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
22
# Copyright 2015 Sergio Fernández
23
24
"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
25
26
>>> import urllib2
27
>>> from keepalive import HTTPHandler
28
>>> keepalive_handler = HTTPHandler()
29
>>> opener = _urllib.request.build_opener(keepalive_handler)
30
>>> _urllib.request.install_opener(opener)
31
>>>
32
>>> fo = _urllib.request.urlopen('http://www.python.org')
33
34
If a connection to a given host is requested, and all of the existing
35
connections are still in use, another connection will be opened. If
36
the handler tries to use an existing connection but it fails in some
37
way, it will be closed and removed from the pool.
38
39
To remove the handler, simply re-run build_opener with no arguments, and
40
install that opener.
41
42
You can explicitly close connections by using the close_connection()
43
method of the returned file-like object (described below) or you can
44
use the handler methods:
45
46
close_connection(host)
47
close_all()
48
open_connections()
49
50
NOTE: using the close_connection and close_all methods of the handler
51
should be done with care when using multiple threads.
52
* there is nothing that prevents another thread from creating new
53
connections immediately after connections are closed
54
* no checks are done to prevent in-use connections from being closed
55
56
>>> keepalive_handler.close_all()
57
58
EXTRA ATTRIBUTES AND METHODS
59
60
Upon a status of 200, the object returned has a few additional
61
attributes and methods, which should not be used if you want to
62
remain consistent with the normal urllib2-returned objects:
63
64
close_connection() - close the connection to the host
65
readlines() - you know, readlines()
66
status - the return status (ie 404)
67
reason - english translation of status (ie 'File not found')
68
69
If you want the best of both worlds, use this inside an
70
AttributeError-catching try:
71
72
>>> try: status = fo.status
73
>>> except AttributeError: status = None
74
75
Unfortunately, these are ONLY there if status == 200, so it's not
76
easy to distinguish between non-200 responses. The reason is that
77
urllib2 tries to do clever things with error codes 301, 302, 401,
78
and 407, and it wraps the object upon return.
79
80
For python versions earlier than 2.4, you can avoid this fancy error
81
handling by setting the module-level global HANDLE_ERRORS to zero.
82
You see, prior to 2.4, it's the HTTP Handler's job to determine what
83
to handle specially, and what to just pass up. HANDLE_ERRORS == 0
84
means "pass everything up". In python 2.4, however, this job no
85
longer belongs to the HTTP Handler and is now done by a NEW handler,
86
HTTPErrorProcessor. Here's the bottom line:
87
88
python version < 2.4
89
HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
90
errors
91
HANDLE_ERRORS == 0 pass everything up, error processing is
92
left to the calling code
93
python version >= 2.4
94
HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
95
HANDLE_ERRORS == 0 (default) pass everything up, let the
96
other handlers (specifically,
97
HTTPErrorProcessor) decide what to do
98
99
In practice, setting the variable either way makes little difference
100
in python 2.4, so for the most consistent behavior across versions,
101
you probably just want to use the defaults, which will give you
102
exceptions on errors.
103
104
"""
105
106
from __future__ import print_function
107
108
try:
109
from thirdparty.six.moves import http_client as _http_client
110
from thirdparty.six.moves import range as _range
111
from thirdparty.six.moves import urllib as _urllib
112
except ImportError:
113
from six.moves import http_client as _http_client
114
from six.moves import range as _range
115
from six.moves import urllib as _urllib
116
117
import socket
118
import threading
119
120
DEBUG = None
121
122
import sys
123
if sys.version_info < (2, 4): HANDLE_ERRORS = 1
124
else: HANDLE_ERRORS = 0
125
126
class ConnectionManager:
127
"""
128
The connection manager must be able to:
129
* keep track of all existing
130
"""
131
def __init__(self):
132
self._lock = threading.Lock()
133
self._hostmap = {} # map hosts to a list of connections
134
self._connmap = {} # map connections to host
135
self._readymap = {} # map connection to ready state
136
137
def add(self, host, connection, ready):
138
self._lock.acquire()
139
try:
140
if host not in self._hostmap: self._hostmap[host] = []
141
self._hostmap[host].append(connection)
142
self._connmap[connection] = host
143
self._readymap[connection] = ready
144
finally:
145
self._lock.release()
146
147
def remove(self, connection):
148
self._lock.acquire()
149
try:
150
try:
151
host = self._connmap[connection]
152
except KeyError:
153
pass
154
else:
155
del self._connmap[connection]
156
del self._readymap[connection]
157
self._hostmap[host].remove(connection)
158
if not self._hostmap[host]: del self._hostmap[host]
159
finally:
160
self._lock.release()
161
162
def set_ready(self, connection, ready):
163
try: self._readymap[connection] = ready
164
except KeyError: pass
165
166
def get_ready_conn(self, host):
167
conn = None
168
try:
169
self._lock.acquire()
170
if host in self._hostmap:
171
for c in self._hostmap[host]:
172
if self._readymap.get(c):
173
self._readymap[c] = 0
174
conn = c
175
break
176
finally:
177
self._lock.release()
178
return conn
179
180
def get_all(self, host=None):
181
if host:
182
return list(self._hostmap.get(host, []))
183
else:
184
return dict(self._hostmap)
185
186
class KeepAliveHandler:
187
def __init__(self):
188
self._cm = ConnectionManager()
189
190
#### Connection Management
191
def open_connections(self):
192
"""return a list of connected hosts and the number of connections
193
to each. [('foo.com:80', 2), ('bar.org', 1)]"""
194
return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
195
196
def close_connection(self, host):
197
"""close connection(s) to <host>
198
host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
199
no error occurs if there is no connection to that host."""
200
for h in self._cm.get_all(host):
201
self._cm.remove(h)
202
h.close()
203
204
def close_all(self):
205
"""close all open connections"""
206
for host, conns in self._cm.get_all().items():
207
for h in conns:
208
self._cm.remove(h)
209
h.close()
210
211
def _request_closed(self, request, host, connection):
212
"""tells us that this request is now closed and the the
213
connection is ready for another request"""
214
self._cm.set_ready(connection, 1)
215
216
def _remove_connection(self, host, connection, close=0):
217
if close: connection.close()
218
self._cm.remove(connection)
219
220
#### Transaction Execution
221
def do_open(self, req):
222
host = req.host
223
if not host:
224
raise _urllib.error.URLError('no host given')
225
226
try:
227
h = self._cm.get_ready_conn(host)
228
while h:
229
r = self._reuse_connection(h, req, host)
230
231
# if this response is non-None, then it worked and we're
232
# done. Break out, skipping the else block.
233
if r: break
234
235
# connection is bad - possibly closed by server
236
# discard it and ask for the next free connection
237
h.close()
238
self._cm.remove(h)
239
h = self._cm.get_ready_conn(host)
240
else:
241
# no (working) free connections were found. Create a new one.
242
h = self._get_connection(host)
243
if DEBUG: DEBUG.info("creating new connection to %s (%d)",
244
host, id(h))
245
self._cm.add(host, h, 0)
246
self._start_transaction(h, req)
247
r = h.getresponse()
248
except (socket.error, _http_client.HTTPException) as err:
249
raise _urllib.error.URLError(err)
250
251
if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
252
253
# if not a persistent connection, don't try to reuse it
254
if r.will_close:
255
if DEBUG: DEBUG.info('server will close connection, discarding')
256
self._cm.remove(h)
257
258
r._handler = self
259
r._host = host
260
r._url = req.get_full_url()
261
r._connection = h
262
r.code = r.status
263
r.headers = r.msg
264
r.msg = r.reason
265
266
if r.status == 200 or not HANDLE_ERRORS:
267
return r
268
else:
269
return self.parent.error('http', req, r,
270
r.status, r.msg, r.headers)
271
272
def _reuse_connection(self, h, req, host):
273
"""start the transaction with a re-used connection
274
return a response object (r) upon success or None on failure.
275
This DOES not close or remove bad connections in cases where
276
it returns. However, if an unexpected exception occurs, it
277
will close and remove the connection before re-raising.
278
"""
279
try:
280
self._start_transaction(h, req)
281
r = h.getresponse()
282
# note: just because we got something back doesn't mean it
283
# worked. We'll check the version below, too.
284
except (socket.error, _http_client.HTTPException):
285
r = None
286
except:
287
# adding this block just in case we've missed
288
# something we will still raise the exception, but
289
# lets try and close the connection and remove it
290
# first. We previously got into a nasty loop
291
# where an exception was uncaught, and so the
292
# connection stayed open. On the next try, the
293
# same exception was raised, etc. The tradeoff is
294
# that it's now possible this call will raise
295
# a DIFFERENT exception
296
if DEBUG: DEBUG.error("unexpected exception - closing " + \
297
"connection to %s (%d)", host, id(h))
298
self._cm.remove(h)
299
h.close()
300
raise
301
302
if r is None or r.version == 9:
303
# httplib falls back to assuming HTTP 0.9 if it gets a
304
# bad header back. This is most likely to happen if
305
# the socket has been closed by the server since we
306
# last used the connection.
307
if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
308
host, id(h))
309
r = None
310
else:
311
if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
312
313
return r
314
315
def _start_transaction(self, h, req):
316
try:
317
if req.data:
318
data = req.data
319
if hasattr(req, 'selector'):
320
h.putrequest(req.get_method() or 'POST', req.selector, skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
321
else:
322
h.putrequest(req.get_method() or 'POST', req.get_selector(), skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
323
if 'Content-type' not in req.headers:
324
h.putheader('Content-type',
325
'application/x-www-form-urlencoded')
326
if 'Content-length' not in req.headers:
327
h.putheader('Content-length', '%d' % len(data))
328
else:
329
if hasattr(req, 'selector'):
330
h.putrequest(req.get_method() or 'GET', req.selector, skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
331
else:
332
h.putrequest(req.get_method() or 'GET', req.get_selector(), skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
333
except (socket.error, _http_client.HTTPException) as err:
334
raise _urllib.error.URLError(err)
335
336
if 'Connection' not in req.headers:
337
req.headers['Connection'] = 'keep-alive'
338
339
for args in self.parent.addheaders:
340
if args[0] not in req.headers:
341
h.putheader(*args)
342
for k, v in req.headers.items():
343
h.putheader(k, v)
344
h.endheaders()
345
if req.data:
346
h.send(data)
347
348
def _get_connection(self, host):
349
return NotImplementedError
350
351
class HTTPHandler(KeepAliveHandler, _urllib.request.HTTPHandler):
352
def __init__(self):
353
KeepAliveHandler.__init__(self)
354
355
def http_open(self, req):
356
return self.do_open(req)
357
358
def _get_connection(self, host):
359
return HTTPConnection(host)
360
361
class HTTPSHandler(KeepAliveHandler, _urllib.request.HTTPSHandler):
362
def __init__(self, ssl_factory=None):
363
KeepAliveHandler.__init__(self)
364
if not ssl_factory:
365
try:
366
import sslfactory
367
ssl_factory = sslfactory.get_factory()
368
except ImportError:
369
pass
370
self._ssl_factory = ssl_factory
371
372
def https_open(self, req):
373
return self.do_open(req)
374
375
def _get_connection(self, host):
376
try: return self._ssl_factory.get_https_connection(host)
377
except AttributeError: return HTTPSConnection(host)
378
379
class HTTPResponse(_http_client.HTTPResponse):
380
# we need to subclass HTTPResponse in order to
381
# 1) add readline() and readlines() methods
382
# 2) add close_connection() methods
383
# 3) add info() and geturl() methods
384
385
# in order to add readline(), read must be modified to deal with a
386
# buffer. example: readline must read a buffer and then spit back
387
# one line at a time. The only real alternative is to read one
388
# BYTE at a time (ick). Once something has been read, it can't be
389
# put back (ok, maybe it can, but that's even uglier than this),
390
# so if you THEN do a normal read, you must first take stuff from
391
# the buffer.
392
393
# the read method wraps the original to accomodate buffering,
394
# although read() never adds to the buffer.
395
# Both readline and readlines have been stolen with almost no
396
# modification from socket.py
397
398
399
def __init__(self, sock, debuglevel=0, strict=0, method=None):
400
if method: # the httplib in python 2.3 uses the method arg
401
_http_client.HTTPResponse.__init__(self, sock, debuglevel, method)
402
else: # 2.2 doesn't
403
_http_client.HTTPResponse.__init__(self, sock, debuglevel)
404
self.fileno = sock.fileno
405
self.code = None
406
self._method = method
407
self._rbuf = b""
408
self._rbufsize = 8096
409
self._handler = None # inserted by the handler later
410
self._host = None # (same)
411
self._url = None # (same)
412
self._connection = None # (same)
413
414
_raw_read = _http_client.HTTPResponse.read
415
416
def close(self):
417
if self.fp:
418
self.fp.close()
419
self.fp = None
420
if self._handler:
421
self._handler._request_closed(self, self._host,
422
self._connection)
423
424
# Note: Patch for Python3 (otherwise, connections won't be reusable)
425
def _close_conn(self):
426
self.close()
427
428
def close_connection(self):
429
self._handler._remove_connection(self._host, self._connection, close=1)
430
self.close()
431
432
def info(self):
433
return self.headers
434
435
def geturl(self):
436
return self._url
437
438
def read(self, amt=None):
439
# the _rbuf test is only in this first if for speed. It's not
440
# logically necessary
441
if self._rbuf and not amt is None:
442
L = len(self._rbuf)
443
if amt > L:
444
amt -= L
445
else:
446
s = self._rbuf[:amt]
447
self._rbuf = self._rbuf[amt:]
448
return s
449
450
s = self._rbuf + self._raw_read(amt)
451
self._rbuf = b""
452
return s
453
454
def readline(self, limit=-1):
455
data = b""
456
i = self._rbuf.find('\n')
457
while i < 0 and not (0 < limit <= len(self._rbuf)):
458
new = self._raw_read(self._rbufsize)
459
if not new: break
460
i = new.find('\n')
461
if i >= 0: i = i + len(self._rbuf)
462
self._rbuf = self._rbuf + new
463
if i < 0: i = len(self._rbuf)
464
else: i = i+1
465
if 0 <= limit < len(self._rbuf): i = limit
466
data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
467
return data
468
469
def readlines(self, sizehint = 0):
470
total = 0
471
list = []
472
while 1:
473
line = self.readline()
474
if not line: break
475
list.append(line)
476
total += len(line)
477
if sizehint and total >= sizehint:
478
break
479
return list
480
481
482
class HTTPConnection(_http_client.HTTPConnection):
483
# use the modified response class
484
response_class = HTTPResponse
485
486
class HTTPSConnection(_http_client.HTTPSConnection):
487
response_class = HTTPResponse
488
489
#########################################################################
490
##### TEST FUNCTIONS
491
#########################################################################
492
493
def error_handler(url):
494
global HANDLE_ERRORS
495
orig = HANDLE_ERRORS
496
keepalive_handler = HTTPHandler()
497
opener = _urllib.request.build_opener(keepalive_handler)
498
_urllib.request.install_opener(opener)
499
pos = {0: 'off', 1: 'on'}
500
for i in (0, 1):
501
print(" fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i))
502
HANDLE_ERRORS = i
503
try:
504
fo = _urllib.request.urlopen(url)
505
foo = fo.read()
506
fo.close()
507
try: status, reason = fo.status, fo.reason
508
except AttributeError: status, reason = None, None
509
except IOError as e:
510
print(" EXCEPTION: %s" % e)
511
raise
512
else:
513
print(" status = %s, reason = %s" % (status, reason))
514
HANDLE_ERRORS = orig
515
hosts = keepalive_handler.open_connections()
516
print("open connections:", hosts)
517
keepalive_handler.close_all()
518
519
def continuity(url):
520
from hashlib import md5
521
format = '%25s: %s'
522
523
# first fetch the file with the normal http handler
524
opener = _urllib.request.build_opener()
525
_urllib.request.install_opener(opener)
526
fo = _urllib.request.urlopen(url)
527
foo = fo.read()
528
fo.close()
529
m = md5(foo)
530
print(format % ('normal urllib', m.hexdigest()))
531
532
# now install the keepalive handler and try again
533
opener = _urllib.request.build_opener(HTTPHandler())
534
_urllib.request.install_opener(opener)
535
536
fo = _urllib.request.urlopen(url)
537
foo = fo.read()
538
fo.close()
539
m = md5(foo)
540
print(format % ('keepalive read', m.hexdigest()))
541
542
fo = _urllib.request.urlopen(url)
543
foo = ''
544
while 1:
545
f = fo.readline()
546
if f: foo = foo + f
547
else: break
548
fo.close()
549
m = md5(foo)
550
print(format % ('keepalive readline', m.hexdigest()))
551
552
def comp(N, url):
553
print(' making %i connections to:\n %s' % (N, url))
554
555
sys.stdout.write(' first using the normal urllib handlers')
556
# first use normal opener
557
opener = _urllib.request.build_opener()
558
_urllib.request.install_opener(opener)
559
t1 = fetch(N, url)
560
print(' TIME: %.3f s' % t1)
561
562
sys.stdout.write(' now using the keepalive handler ')
563
# now install the keepalive handler and try again
564
opener = _urllib.request.build_opener(HTTPHandler())
565
_urllib.request.install_opener(opener)
566
t2 = fetch(N, url)
567
print(' TIME: %.3f s' % t2)
568
print(' improvement factor: %.2f' % (t1/t2, ))
569
570
def fetch(N, url, delay=0):
571
import time
572
lens = []
573
starttime = time.time()
574
for i in _range(N):
575
if delay and i > 0: time.sleep(delay)
576
fo = _urllib.request.urlopen(url)
577
foo = fo.read()
578
fo.close()
579
lens.append(len(foo))
580
diff = time.time() - starttime
581
582
j = 0
583
for i in lens[1:]:
584
j = j + 1
585
if not i == lens[0]:
586
print("WARNING: inconsistent length on read %i: %i" % (j, i))
587
588
return diff
589
590
def test_timeout(url):
591
global DEBUG
592
dbbackup = DEBUG
593
class FakeLogger:
594
def debug(self, msg, *args): print(msg % args)
595
info = warning = error = debug
596
DEBUG = FakeLogger()
597
print(" fetching the file to establish a connection")
598
fo = _urllib.request.urlopen(url)
599
data1 = fo.read()
600
fo.close()
601
602
i = 20
603
print(" waiting %i seconds for the server to close the connection" % i)
604
while i > 0:
605
sys.stdout.write('\r %2i' % i)
606
sys.stdout.flush()
607
time.sleep(1)
608
i -= 1
609
sys.stderr.write('\r')
610
611
print(" fetching the file a second time")
612
fo = _urllib.request.urlopen(url)
613
data2 = fo.read()
614
fo.close()
615
616
if data1 == data2:
617
print(' data are identical')
618
else:
619
print(' ERROR: DATA DIFFER')
620
621
DEBUG = dbbackup
622
623
624
def test(url, N=10):
625
print("checking error hander (do this on a non-200)")
626
try: error_handler(url)
627
except IOError as e:
628
print("exiting - exception will prevent further tests")
629
sys.exit()
630
print()
631
print("performing continuity test (making sure stuff isn't corrupted)")
632
continuity(url)
633
print()
634
print("performing speed comparison")
635
comp(N, url)
636
print()
637
print("performing dropped-connection check")
638
test_timeout(url)
639
640
if __name__ == '__main__':
641
import time
642
import sys
643
try:
644
N = int(sys.argv[1])
645
url = sys.argv[2]
646
except:
647
print("%s <integer> <url>" % sys.argv[0])
648
else:
649
test(url, N)
650
651