Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sqlmapproject
GitHub Repository: sqlmapproject/sqlmap
Path: blob/master/thirdparty/keepalive/keepalive.py
3554 views
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
# This library is free software; you can redistribute it and/or
5
# modify it under the terms of the GNU Lesser General Public
6
# License as published by the Free Software Foundation; either
7
# version 2.1 of the License, or (at your option) any later version.
8
#
9
# This library is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
# Lesser General Public License for more details.
13
#
14
# You should have received a copy of the GNU Lesser General Public
15
# License along with this library; if not, write to the
16
# Free Software Foundation, Inc.,
17
# 59 Temple Place, Suite 330,
18
# Boston, MA 02111-1307 USA
19
20
# This file was part of urlgrabber, a high-level cross-protocol url-grabber
21
# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
22
# Copyright 2015 Sergio Fernández
23
24
"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
25
26
>>> import urllib2
27
>>> from keepalive import HTTPHandler
28
>>> keepalive_handler = HTTPHandler()
29
>>> opener = _urllib.request.build_opener(keepalive_handler)
30
>>> _urllib.request.install_opener(opener)
31
>>>
32
>>> fo = _urllib.request.urlopen('http://www.python.org')
33
34
If a connection to a given host is requested, and all of the existing
35
connections are still in use, another connection will be opened. If
36
the handler tries to use an existing connection but it fails in some
37
way, it will be closed and removed from the pool.
38
39
To remove the handler, simply re-run build_opener with no arguments, and
40
install that opener.
41
42
You can explicitly close connections by using the close_connection()
43
method of the returned file-like object (described below) or you can
44
use the handler methods:
45
46
close_connection(host)
47
close_all()
48
open_connections()
49
50
NOTE: using the close_connection and close_all methods of the handler
51
should be done with care when using multiple threads.
52
* there is nothing that prevents another thread from creating new
53
connections immediately after connections are closed
54
* no checks are done to prevent in-use connections from being closed
55
56
>>> keepalive_handler.close_all()
57
58
EXTRA ATTRIBUTES AND METHODS
59
60
Upon a status of 200, the object returned has a few additional
61
attributes and methods, which should not be used if you want to
62
remain consistent with the normal urllib2-returned objects:
63
64
close_connection() - close the connection to the host
65
readlines() - you know, readlines()
66
status - the return status (ie 404)
67
reason - english translation of status (ie 'File not found')
68
69
If you want the best of both worlds, use this inside an
70
AttributeError-catching try:
71
72
>>> try: status = fo.status
73
>>> except AttributeError: status = None
74
75
Unfortunately, these are ONLY there if status == 200, so it's not
76
easy to distinguish between non-200 responses. The reason is that
77
urllib2 tries to do clever things with error codes 301, 302, 401,
78
and 407, and it wraps the object upon return.
79
80
For python versions earlier than 2.4, you can avoid this fancy error
81
handling by setting the module-level global HANDLE_ERRORS to zero.
82
You see, prior to 2.4, it's the HTTP Handler's job to determine what
83
to handle specially, and what to just pass up. HANDLE_ERRORS == 0
84
means "pass everything up". In python 2.4, however, this job no
85
longer belongs to the HTTP Handler and is now done by a NEW handler,
86
HTTPErrorProcessor. Here's the bottom line:
87
88
python version < 2.4
89
HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
90
errors
91
HANDLE_ERRORS == 0 pass everything up, error processing is
92
left to the calling code
93
python version >= 2.4
94
HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
95
HANDLE_ERRORS == 0 (default) pass everything up, let the
96
other handlers (specifically,
97
HTTPErrorProcessor) decide what to do
98
99
In practice, setting the variable either way makes little difference
100
in python 2.4, so for the most consistent behavior across versions,
101
you probably just want to use the defaults, which will give you
102
exceptions on errors.
103
104
"""
105
106
from __future__ import print_function
107
108
try:
109
from thirdparty.six.moves import http_client as _http_client
110
from thirdparty.six.moves import range as _range
111
from thirdparty.six.moves import urllib as _urllib
112
except ImportError:
113
from six.moves import http_client as _http_client
114
from six.moves import range as _range
115
from six.moves import urllib as _urllib
116
117
import socket
118
import threading
119
120
DEBUG = None
121
122
import sys
123
if sys.version_info < (2, 4): HANDLE_ERRORS = 1
124
else: HANDLE_ERRORS = 0
125
126
class ConnectionManager:
127
"""
128
The connection manager must be able to:
129
* keep track of all existing
130
"""
131
def __init__(self):
132
self._lock = threading.Lock()
133
self._hostmap = {} # map hosts to a list of connections
134
self._connmap = {} # map connections to host
135
self._readymap = {} # map connection to ready state
136
137
def add(self, host, connection, ready):
138
self._lock.acquire()
139
try:
140
if host not in self._hostmap: self._hostmap[host] = []
141
self._hostmap[host].append(connection)
142
self._connmap[connection] = host
143
self._readymap[connection] = ready
144
finally:
145
self._lock.release()
146
147
def remove(self, connection):
148
self._lock.acquire()
149
try:
150
try:
151
host = self._connmap[connection]
152
except KeyError:
153
pass
154
else:
155
del self._connmap[connection]
156
del self._readymap[connection]
157
try:
158
self._hostmap[host].remove(connection)
159
except ValueError:
160
pass
161
if not self._hostmap[host]: del self._hostmap[host]
162
finally:
163
self._lock.release()
164
165
def set_ready(self, connection, ready):
166
self._lock.acquire()
167
try:
168
if connection in self._readymap: self._readymap[connection] = ready
169
finally:
170
self._lock.release()
171
172
def get_ready_conn(self, host):
173
conn = None
174
try:
175
self._lock.acquire()
176
if host in self._hostmap:
177
for c in self._hostmap[host]:
178
if self._readymap.get(c):
179
self._readymap[c] = 0
180
conn = c
181
break
182
finally:
183
self._lock.release()
184
return conn
185
186
def get_all(self, host=None):
187
self._lock.acquire()
188
try:
189
if host:
190
return list(self._hostmap.get(host, []))
191
else:
192
return dict(self._hostmap)
193
finally:
194
self._lock.release()
195
196
class KeepAliveHandler:
197
def __init__(self):
198
self._cm = ConnectionManager()
199
200
#### Connection Management
201
def open_connections(self):
202
"""return a list of connected hosts and the number of connections
203
to each. [('foo.com:80', 2), ('bar.org', 1)]"""
204
return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
205
206
def close_connection(self, host):
207
"""close connection(s) to <host>
208
host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
209
no error occurs if there is no connection to that host."""
210
for h in self._cm.get_all(host):
211
self._cm.remove(h)
212
h.close()
213
214
def close_all(self):
215
"""close all open connections"""
216
for host, conns in self._cm.get_all().items():
217
for h in conns:
218
self._cm.remove(h)
219
h.close()
220
221
def _request_closed(self, request, host, connection):
222
"""tells us that this request is now closed and the the
223
connection is ready for another request"""
224
self._cm.set_ready(connection, 1)
225
226
def _remove_connection(self, host, connection, close=0):
227
if close: connection.close()
228
self._cm.remove(connection)
229
230
#### Transaction Execution
231
def do_open(self, req):
232
host = req.host
233
if not host:
234
raise _urllib.error.URLError('no host given')
235
236
try:
237
h = self._cm.get_ready_conn(host)
238
while h:
239
r = self._reuse_connection(h, req, host)
240
241
# if this response is non-None, then it worked and we're
242
# done. Break out, skipping the else block.
243
if r: break
244
245
# connection is bad - possibly closed by server
246
# discard it and ask for the next free connection
247
h.close()
248
self._cm.remove(h)
249
h = self._cm.get_ready_conn(host)
250
else:
251
# no (working) free connections were found. Create a new one.
252
h = self._get_connection(host)
253
if DEBUG: DEBUG.info("creating new connection to %s (%d)",
254
host, id(h))
255
self._start_transaction(h, req)
256
r = h.getresponse()
257
self._cm.add(host, h, 0)
258
except (socket.error, _http_client.HTTPException) as err:
259
raise _urllib.error.URLError(err)
260
261
if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
262
263
if not r.will_close:
264
try:
265
headers = getattr(r, 'msg', None)
266
if headers:
267
c_head = headers.get("connection")
268
if c_head and "close" in c_head.lower():
269
r.will_close = True
270
except Exception:
271
pass
272
273
# if not a persistent connection, don't try to reuse it
274
if r.will_close:
275
if DEBUG: DEBUG.info('server will close connection, discarding')
276
self._cm.remove(h)
277
h.close()
278
279
r._handler = self
280
r._host = host
281
r._url = req.get_full_url()
282
r._connection = h
283
r.code = r.status
284
r.headers = r.msg
285
286
if r.status == 200 or not HANDLE_ERRORS:
287
return r
288
else:
289
return self.parent.error('http', req, r,
290
r.status, r.reason, r.headers)
291
292
def _reuse_connection(self, h, req, host):
293
"""start the transaction with a re-used connection
294
return a response object (r) upon success or None on failure.
295
This DOES not close or remove bad connections in cases where
296
it returns. However, if an unexpected exception occurs, it
297
will close and remove the connection before re-raising.
298
"""
299
try:
300
self._start_transaction(h, req)
301
r = h.getresponse()
302
# note: just because we got something back doesn't mean it
303
# worked. We'll check the version below, too.
304
except (socket.error, _http_client.HTTPException):
305
r = None
306
except Exception:
307
# adding this block just in case we've missed
308
# something we will still raise the exception, but
309
# lets try and close the connection and remove it
310
# first. We previously got into a nasty loop
311
# where an exception was uncaught, and so the
312
# connection stayed open. On the next try, the
313
# same exception was raised, etc. The tradeoff is
314
# that it's now possible this call will raise
315
# a DIFFERENT exception
316
if DEBUG: DEBUG.error("unexpected exception - closing " + \
317
"connection to %s (%d)", host, id(h))
318
self._cm.remove(h)
319
h.close()
320
raise
321
322
if r is None or r.version == 9:
323
# httplib falls back to assuming HTTP 0.9 if it gets a
324
# bad header back. This is most likely to happen if
325
# the socket has been closed by the server since we
326
# last used the connection.
327
if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
328
host, id(h))
329
r = None
330
else:
331
if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
332
333
return r
334
335
def _start_transaction(self, h, req):
336
try:
337
if req.data:
338
data = req.data
339
if hasattr(req, 'selector'):
340
h.putrequest(req.get_method() or 'POST', req.selector, skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
341
else:
342
h.putrequest(req.get_method() or 'POST', req.get_selector(), skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
343
if 'Content-type' not in req.headers:
344
h.putheader('Content-type',
345
'application/x-www-form-urlencoded')
346
if 'Content-length' not in req.headers:
347
h.putheader('Content-length', '%d' % len(data))
348
else:
349
if hasattr(req, 'selector'):
350
h.putrequest(req.get_method() or 'GET', req.selector, skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
351
else:
352
h.putrequest(req.get_method() or 'GET', req.get_selector(), skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
353
except (socket.error, _http_client.HTTPException) as err:
354
raise _urllib.error.URLError(err)
355
356
if 'Connection' not in req.headers:
357
h.putheader('Connection', 'keep-alive')
358
359
for args in self.parent.addheaders:
360
if args[0] not in req.headers:
361
h.putheader(*args)
362
for k, v in req.headers.items():
363
h.putheader(k, v)
364
h.endheaders()
365
if req.data:
366
h.send(req.data)
367
368
def _get_connection(self, host):
369
raise NotImplementedError()
370
371
class HTTPHandler(KeepAliveHandler, _urllib.request.HTTPHandler):
372
def __init__(self):
373
KeepAliveHandler.__init__(self)
374
375
def http_open(self, req):
376
return self.do_open(req)
377
378
def _get_connection(self, host):
379
return HTTPConnection(host)
380
381
class HTTPSHandler(KeepAliveHandler, _urllib.request.HTTPSHandler):
382
def __init__(self, ssl_factory=None):
383
KeepAliveHandler.__init__(self)
384
if not ssl_factory:
385
try:
386
import sslfactory
387
ssl_factory = sslfactory.get_factory()
388
except ImportError:
389
pass
390
self._ssl_factory = ssl_factory
391
392
def https_open(self, req):
393
return self.do_open(req)
394
395
def _get_connection(self, host):
396
if self._ssl_factory:
397
return self._ssl_factory.get_https_connection(host)
398
else:
399
return HTTPSConnection(host)
400
401
class HTTPResponse(_http_client.HTTPResponse):
402
# we need to subclass HTTPResponse in order to
403
# 1) add readline() and readlines() methods
404
# 2) add close_connection() methods
405
# 3) add info() and geturl() methods
406
407
# in order to add readline(), read must be modified to deal with a
408
# buffer. example: readline must read a buffer and then spit back
409
# one line at a time. The only real alternative is to read one
410
# BYTE at a time (ick). Once something has been read, it can't be
411
# put back (ok, maybe it can, but that's even uglier than this),
412
# so if you THEN do a normal read, you must first take stuff from
413
# the buffer.
414
415
# the read method wraps the original to accomodate buffering,
416
# although read() never adds to the buffer.
417
# Both readline and readlines have been stolen with almost no
418
# modification from socket.py
419
420
421
def __init__(self, sock, debuglevel=0, strict=0, method=None):
422
if method:
423
_http_client.HTTPResponse.__init__(self, sock, debuglevel, method=method)
424
else:
425
_http_client.HTTPResponse.__init__(self, sock, debuglevel)
426
self.fileno = sock.fileno
427
self.code = None
428
self._method = method
429
self._rbuf = b""
430
self._rbufsize = 8096
431
self._handler = None # inserted by the handler later
432
self._host = None # (same)
433
self._url = None # (same)
434
self._connection = None # (same)
435
436
_raw_read = _http_client.HTTPResponse.read
437
438
def close(self):
439
if self.fp:
440
self.fp.close()
441
self.fp = None
442
if self._handler:
443
self._handler._request_closed(self, self._host,
444
self._connection)
445
446
# Note: Patch for Python3 (otherwise, connections won't be reusable)
447
def _close_conn(self):
448
self.close()
449
450
def close_connection(self):
451
self._handler._remove_connection(self._host, self._connection, close=1)
452
self.close()
453
454
def info(self):
455
return self.headers
456
457
def geturl(self):
458
return self._url
459
460
def read(self, amt=None):
461
# the _rbuf test is only in this first if for speed. It's not
462
# logically necessary
463
if self._rbuf and not amt is None:
464
L = len(self._rbuf)
465
if amt > L:
466
amt -= L
467
else:
468
s = self._rbuf[:amt]
469
self._rbuf = self._rbuf[amt:]
470
return s
471
472
s = self._rbuf + self._raw_read(amt)
473
self._rbuf = b""
474
return s
475
476
def readline(self, limit=-1):
477
data = b""
478
i = self._rbuf.find(b'\n')
479
while i < 0 and not (0 < limit <= len(self._rbuf)):
480
new = self._raw_read(self._rbufsize)
481
if not new: break
482
i = new.find(b'\n')
483
if i >= 0: i = i + len(self._rbuf)
484
self._rbuf = self._rbuf + new
485
if i < 0: i = len(self._rbuf)
486
else: i = i+1
487
if 0 <= limit < len(self._rbuf): i = limit
488
data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
489
return data
490
491
def readlines(self, sizehint = 0):
492
total = 0
493
lines = []
494
while 1:
495
line = self.readline()
496
if not line: break
497
lines.append(line)
498
total += len(line)
499
if sizehint and total >= sizehint:
500
break
501
return lines
502
503
504
class HTTPConnection(_http_client.HTTPConnection):
505
# use the modified response class
506
response_class = HTTPResponse
507
508
class HTTPSConnection(_http_client.HTTPSConnection):
509
response_class = HTTPResponse
510
511
#########################################################################
512
##### TEST FUNCTIONS
513
#########################################################################
514
515
def error_handler(url):
516
global HANDLE_ERRORS
517
orig = HANDLE_ERRORS
518
keepalive_handler = HTTPHandler()
519
opener = _urllib.request.build_opener(keepalive_handler)
520
_urllib.request.install_opener(opener)
521
pos = {0: 'off', 1: 'on'}
522
for i in (0, 1):
523
print(" fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i))
524
HANDLE_ERRORS = i
525
try:
526
fo = _urllib.request.urlopen(url)
527
foo = fo.read()
528
fo.close()
529
try: status, reason = fo.status, fo.reason
530
except AttributeError: status, reason = None, None
531
except IOError as e:
532
print(" EXCEPTION: %s" % e)
533
raise
534
else:
535
print(" status = %s, reason = %s" % (status, reason))
536
HANDLE_ERRORS = orig
537
hosts = keepalive_handler.open_connections()
538
print("open connections:", hosts)
539
keepalive_handler.close_all()
540
541
def continuity(url):
542
from hashlib import md5
543
format = '%25s: %s'
544
545
# first fetch the file with the normal http handler
546
opener = _urllib.request.build_opener()
547
_urllib.request.install_opener(opener)
548
fo = _urllib.request.urlopen(url)
549
foo = fo.read()
550
fo.close()
551
m = md5(foo)
552
print(format % ('normal urllib', m.hexdigest()))
553
554
# now install the keepalive handler and try again
555
opener = _urllib.request.build_opener(HTTPHandler())
556
_urllib.request.install_opener(opener)
557
558
fo = _urllib.request.urlopen(url)
559
foo = fo.read()
560
fo.close()
561
m = md5(foo)
562
print(format % ('keepalive read', m.hexdigest()))
563
564
fo = _urllib.request.urlopen(url)
565
foo = b''
566
while 1:
567
f = fo.readline()
568
if f: foo += f
569
else: break
570
fo.close()
571
m = md5(foo)
572
print(format % ('keepalive readline', m.hexdigest()))
573
574
def comp(N, url):
575
print(' making %i connections to:\n %s' % (N, url))
576
577
sys.stdout.write(' first using the normal urllib handlers')
578
# first use normal opener
579
opener = _urllib.request.build_opener()
580
_urllib.request.install_opener(opener)
581
t1 = fetch(N, url)
582
print(' TIME: %.3f s' % t1)
583
584
sys.stdout.write(' now using the keepalive handler ')
585
# now install the keepalive handler and try again
586
opener = _urllib.request.build_opener(HTTPHandler())
587
_urllib.request.install_opener(opener)
588
t2 = fetch(N, url)
589
print(' TIME: %.3f s' % t2)
590
print(' improvement factor: %.2f' % (t1/t2, ))
591
592
def fetch(N, url, delay=0):
593
import time
594
lens = []
595
starttime = time.time()
596
for i in _range(N):
597
if delay and i > 0: time.sleep(delay)
598
fo = _urllib.request.urlopen(url)
599
foo = fo.read()
600
fo.close()
601
lens.append(len(foo))
602
diff = time.time() - starttime
603
604
j = 0
605
for i in lens[1:]:
606
j = j + 1
607
if not i == lens[0]:
608
print("WARNING: inconsistent length on read %i: %i" % (j, i))
609
610
return diff
611
612
def test_timeout(url):
613
global DEBUG
614
dbbackup = DEBUG
615
class FakeLogger:
616
def debug(self, msg, *args): print(msg % args)
617
info = warning = error = debug
618
DEBUG = FakeLogger()
619
print(" fetching the file to establish a connection")
620
fo = _urllib.request.urlopen(url)
621
data1 = fo.read()
622
fo.close()
623
624
i = 20
625
print(" waiting %i seconds for the server to close the connection" % i)
626
while i > 0:
627
sys.stdout.write('\r %2i' % i)
628
sys.stdout.flush()
629
time.sleep(1)
630
i -= 1
631
sys.stderr.write('\r')
632
633
print(" fetching the file a second time")
634
fo = _urllib.request.urlopen(url)
635
data2 = fo.read()
636
fo.close()
637
638
if data1 == data2:
639
print(' data are identical')
640
else:
641
print(' ERROR: DATA DIFFER')
642
643
DEBUG = dbbackup
644
645
646
def test(url, N=10):
647
print("checking error hander (do this on a non-200)")
648
try: error_handler(url)
649
except IOError as e:
650
print("exiting - exception will prevent further tests")
651
sys.exit()
652
print()
653
print("performing continuity test (making sure stuff isn't corrupted)")
654
continuity(url)
655
print()
656
print("performing speed comparison")
657
comp(N, url)
658
print()
659
print("performing dropped-connection check")
660
test_timeout(url)
661
662
if __name__ == '__main__':
663
import time
664
import sys
665
try:
666
N = int(sys.argv[1])
667
url = sys.argv[2]
668
except:
669
print("%s <integer> <url>" % sys.argv[0])
670
else:
671
test(url, N)
672
673