Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hhhrrrttt222111
GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/urllib3/response.py
811 views
1
from __future__ import absolute_import
2
from contextlib import contextmanager
3
import zlib
4
import io
5
import logging
6
from socket import timeout as SocketTimeout
7
from socket import error as SocketError
8
9
try:
10
import brotli
11
except ImportError:
12
brotli = None
13
14
from ._collections import HTTPHeaderDict
15
from .exceptions import (
16
BodyNotHttplibCompatible,
17
ProtocolError,
18
DecodeError,
19
ReadTimeoutError,
20
ResponseNotChunked,
21
IncompleteRead,
22
InvalidHeader,
23
HTTPError,
24
)
25
from .packages.six import string_types as basestring, PY3
26
from .packages.six.moves import http_client as httplib
27
from .connection import HTTPException, BaseSSLError
28
from .util.response import is_fp_closed, is_response_to_head
29
30
log = logging.getLogger(__name__)
31
32
33
class DeflateDecoder(object):
34
def __init__(self):
35
self._first_try = True
36
self._data = b""
37
self._obj = zlib.decompressobj()
38
39
def __getattr__(self, name):
40
return getattr(self._obj, name)
41
42
def decompress(self, data):
43
if not data:
44
return data
45
46
if not self._first_try:
47
return self._obj.decompress(data)
48
49
self._data += data
50
try:
51
decompressed = self._obj.decompress(data)
52
if decompressed:
53
self._first_try = False
54
self._data = None
55
return decompressed
56
except zlib.error:
57
self._first_try = False
58
self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
59
try:
60
return self.decompress(self._data)
61
finally:
62
self._data = None
63
64
65
class GzipDecoderState(object):
66
67
FIRST_MEMBER = 0
68
OTHER_MEMBERS = 1
69
SWALLOW_DATA = 2
70
71
72
class GzipDecoder(object):
73
def __init__(self):
74
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
75
self._state = GzipDecoderState.FIRST_MEMBER
76
77
def __getattr__(self, name):
78
return getattr(self._obj, name)
79
80
def decompress(self, data):
81
ret = bytearray()
82
if self._state == GzipDecoderState.SWALLOW_DATA or not data:
83
return bytes(ret)
84
while True:
85
try:
86
ret += self._obj.decompress(data)
87
except zlib.error:
88
previous_state = self._state
89
# Ignore data after the first error
90
self._state = GzipDecoderState.SWALLOW_DATA
91
if previous_state == GzipDecoderState.OTHER_MEMBERS:
92
# Allow trailing garbage acceptable in other gzip clients
93
return bytes(ret)
94
raise
95
data = self._obj.unused_data
96
if not data:
97
return bytes(ret)
98
self._state = GzipDecoderState.OTHER_MEMBERS
99
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
100
101
102
if brotli is not None:
103
104
class BrotliDecoder(object):
105
# Supports both 'brotlipy' and 'Brotli' packages
106
# since they share an import name. The top branches
107
# are for 'brotlipy' and bottom branches for 'Brotli'
108
def __init__(self):
109
self._obj = brotli.Decompressor()
110
111
def decompress(self, data):
112
if hasattr(self._obj, "decompress"):
113
return self._obj.decompress(data)
114
return self._obj.process(data)
115
116
def flush(self):
117
if hasattr(self._obj, "flush"):
118
return self._obj.flush()
119
return b""
120
121
122
class MultiDecoder(object):
123
"""
124
From RFC7231:
125
If one or more encodings have been applied to a representation, the
126
sender that applied the encodings MUST generate a Content-Encoding
127
header field that lists the content codings in the order in which
128
they were applied.
129
"""
130
131
def __init__(self, modes):
132
self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
133
134
def flush(self):
135
return self._decoders[0].flush()
136
137
def decompress(self, data):
138
for d in reversed(self._decoders):
139
data = d.decompress(data)
140
return data
141
142
143
def _get_decoder(mode):
144
if "," in mode:
145
return MultiDecoder(mode)
146
147
if mode == "gzip":
148
return GzipDecoder()
149
150
if brotli is not None and mode == "br":
151
return BrotliDecoder()
152
153
return DeflateDecoder()
154
155
156
class HTTPResponse(io.IOBase):
157
"""
158
HTTP Response container.
159
160
Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
161
loaded and decoded on-demand when the ``data`` property is accessed. This
162
class is also compatible with the Python standard library's :mod:`io`
163
module, and can hence be treated as a readable object in the context of that
164
framework.
165
166
Extra parameters for behaviour not present in httplib.HTTPResponse:
167
168
:param preload_content:
169
If True, the response's body will be preloaded during construction.
170
171
:param decode_content:
172
If True, will attempt to decode the body based on the
173
'content-encoding' header.
174
175
:param original_response:
176
When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
177
object, it's convenient to include the original for debug purposes. It's
178
otherwise unused.
179
180
:param retries:
181
The retries contains the last :class:`~urllib3.util.retry.Retry` that
182
was used during the request.
183
184
:param enforce_content_length:
185
Enforce content length checking. Body returned by server must match
186
value of Content-Length header, if present. Otherwise, raise error.
187
"""
188
189
CONTENT_DECODERS = ["gzip", "deflate"]
190
if brotli is not None:
191
CONTENT_DECODERS += ["br"]
192
REDIRECT_STATUSES = [301, 302, 303, 307, 308]
193
194
def __init__(
195
self,
196
body="",
197
headers=None,
198
status=0,
199
version=0,
200
reason=None,
201
strict=0,
202
preload_content=True,
203
decode_content=True,
204
original_response=None,
205
pool=None,
206
connection=None,
207
msg=None,
208
retries=None,
209
enforce_content_length=False,
210
request_method=None,
211
request_url=None,
212
auto_close=True,
213
):
214
215
if isinstance(headers, HTTPHeaderDict):
216
self.headers = headers
217
else:
218
self.headers = HTTPHeaderDict(headers)
219
self.status = status
220
self.version = version
221
self.reason = reason
222
self.strict = strict
223
self.decode_content = decode_content
224
self.retries = retries
225
self.enforce_content_length = enforce_content_length
226
self.auto_close = auto_close
227
228
self._decoder = None
229
self._body = None
230
self._fp = None
231
self._original_response = original_response
232
self._fp_bytes_read = 0
233
self.msg = msg
234
self._request_url = request_url
235
236
if body and isinstance(body, (basestring, bytes)):
237
self._body = body
238
239
self._pool = pool
240
self._connection = connection
241
242
if hasattr(body, "read"):
243
self._fp = body
244
245
# Are we using the chunked-style of transfer encoding?
246
self.chunked = False
247
self.chunk_left = None
248
tr_enc = self.headers.get("transfer-encoding", "").lower()
249
# Don't incur the penalty of creating a list and then discarding it
250
encodings = (enc.strip() for enc in tr_enc.split(","))
251
if "chunked" in encodings:
252
self.chunked = True
253
254
# Determine length of response
255
self.length_remaining = self._init_length(request_method)
256
257
# If requested, preload the body.
258
if preload_content and not self._body:
259
self._body = self.read(decode_content=decode_content)
260
261
def get_redirect_location(self):
262
"""
263
Should we redirect and where to?
264
265
:returns: Truthy redirect location string if we got a redirect status
266
code and valid location. ``None`` if redirect status and no
267
location. ``False`` if not a redirect status code.
268
"""
269
if self.status in self.REDIRECT_STATUSES:
270
return self.headers.get("location")
271
272
return False
273
274
def release_conn(self):
275
if not self._pool or not self._connection:
276
return
277
278
self._pool._put_conn(self._connection)
279
self._connection = None
280
281
def drain_conn(self):
282
"""
283
Read and discard any remaining HTTP response data in the response connection.
284
285
Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
286
"""
287
try:
288
self.read()
289
except (HTTPError, SocketError, BaseSSLError, HTTPException):
290
pass
291
292
@property
293
def data(self):
294
# For backwords-compat with earlier urllib3 0.4 and earlier.
295
if self._body:
296
return self._body
297
298
if self._fp:
299
return self.read(cache_content=True)
300
301
@property
302
def connection(self):
303
return self._connection
304
305
def isclosed(self):
306
return is_fp_closed(self._fp)
307
308
def tell(self):
309
"""
310
Obtain the number of bytes pulled over the wire so far. May differ from
311
the amount of content returned by :meth:``HTTPResponse.read`` if bytes
312
are encoded on the wire (e.g, compressed).
313
"""
314
return self._fp_bytes_read
315
316
def _init_length(self, request_method):
317
"""
318
Set initial length value for Response content if available.
319
"""
320
length = self.headers.get("content-length")
321
322
if length is not None:
323
if self.chunked:
324
# This Response will fail with an IncompleteRead if it can't be
325
# received as chunked. This method falls back to attempt reading
326
# the response before raising an exception.
327
log.warning(
328
"Received response with both Content-Length and "
329
"Transfer-Encoding set. This is expressly forbidden "
330
"by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
331
"attempting to process response as Transfer-Encoding: "
332
"chunked."
333
)
334
return None
335
336
try:
337
# RFC 7230 section 3.3.2 specifies multiple content lengths can
338
# be sent in a single Content-Length header
339
# (e.g. Content-Length: 42, 42). This line ensures the values
340
# are all valid ints and that as long as the `set` length is 1,
341
# all values are the same. Otherwise, the header is invalid.
342
lengths = set([int(val) for val in length.split(",")])
343
if len(lengths) > 1:
344
raise InvalidHeader(
345
"Content-Length contained multiple "
346
"unmatching values (%s)" % length
347
)
348
length = lengths.pop()
349
except ValueError:
350
length = None
351
else:
352
if length < 0:
353
length = None
354
355
# Convert status to int for comparison
356
# In some cases, httplib returns a status of "_UNKNOWN"
357
try:
358
status = int(self.status)
359
except ValueError:
360
status = 0
361
362
# Check for responses that shouldn't include a body
363
if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
364
length = 0
365
366
return length
367
368
def _init_decoder(self):
369
"""
370
Set-up the _decoder attribute if necessary.
371
"""
372
# Note: content-encoding value should be case-insensitive, per RFC 7230
373
# Section 3.2
374
content_encoding = self.headers.get("content-encoding", "").lower()
375
if self._decoder is None:
376
if content_encoding in self.CONTENT_DECODERS:
377
self._decoder = _get_decoder(content_encoding)
378
elif "," in content_encoding:
379
encodings = [
380
e.strip()
381
for e in content_encoding.split(",")
382
if e.strip() in self.CONTENT_DECODERS
383
]
384
if len(encodings):
385
self._decoder = _get_decoder(content_encoding)
386
387
DECODER_ERROR_CLASSES = (IOError, zlib.error)
388
if brotli is not None:
389
DECODER_ERROR_CLASSES += (brotli.error,)
390
391
def _decode(self, data, decode_content, flush_decoder):
392
"""
393
Decode the data passed in and potentially flush the decoder.
394
"""
395
if not decode_content:
396
return data
397
398
try:
399
if self._decoder:
400
data = self._decoder.decompress(data)
401
except self.DECODER_ERROR_CLASSES as e:
402
content_encoding = self.headers.get("content-encoding", "").lower()
403
raise DecodeError(
404
"Received response with content-encoding: %s, but "
405
"failed to decode it." % content_encoding,
406
e,
407
)
408
if flush_decoder:
409
data += self._flush_decoder()
410
411
return data
412
413
def _flush_decoder(self):
414
"""
415
Flushes the decoder. Should only be called if the decoder is actually
416
being used.
417
"""
418
if self._decoder:
419
buf = self._decoder.decompress(b"")
420
return buf + self._decoder.flush()
421
422
return b""
423
424
@contextmanager
425
def _error_catcher(self):
426
"""
427
Catch low-level python exceptions, instead re-raising urllib3
428
variants, so that low-level exceptions are not leaked in the
429
high-level api.
430
431
On exit, release the connection back to the pool.
432
"""
433
clean_exit = False
434
435
try:
436
try:
437
yield
438
439
except SocketTimeout:
440
# FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
441
# there is yet no clean way to get at it from this context.
442
raise ReadTimeoutError(self._pool, None, "Read timed out.")
443
444
except BaseSSLError as e:
445
# FIXME: Is there a better way to differentiate between SSLErrors?
446
if "read operation timed out" not in str(e): # Defensive:
447
# This shouldn't happen but just in case we're missing an edge
448
# case, let's avoid swallowing SSL errors.
449
raise
450
451
raise ReadTimeoutError(self._pool, None, "Read timed out.")
452
453
except (HTTPException, SocketError) as e:
454
# This includes IncompleteRead.
455
raise ProtocolError("Connection broken: %r" % e, e)
456
457
# If no exception is thrown, we should avoid cleaning up
458
# unnecessarily.
459
clean_exit = True
460
finally:
461
# If we didn't terminate cleanly, we need to throw away our
462
# connection.
463
if not clean_exit:
464
# The response may not be closed but we're not going to use it
465
# anymore so close it now to ensure that the connection is
466
# released back to the pool.
467
if self._original_response:
468
self._original_response.close()
469
470
# Closing the response may not actually be sufficient to close
471
# everything, so if we have a hold of the connection close that
472
# too.
473
if self._connection:
474
self._connection.close()
475
476
# If we hold the original response but it's closed now, we should
477
# return the connection back to the pool.
478
if self._original_response and self._original_response.isclosed():
479
self.release_conn()
480
481
def read(self, amt=None, decode_content=None, cache_content=False):
482
"""
483
Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
484
parameters: ``decode_content`` and ``cache_content``.
485
486
:param amt:
487
How much of the content to read. If specified, caching is skipped
488
because it doesn't make sense to cache partial content as the full
489
response.
490
491
:param decode_content:
492
If True, will attempt to decode the body based on the
493
'content-encoding' header.
494
495
:param cache_content:
496
If True, will save the returned data such that the same result is
497
returned despite of the state of the underlying file object. This
498
is useful if you want the ``.data`` property to continue working
499
after having ``.read()`` the file object. (Overridden if ``amt`` is
500
set.)
501
"""
502
self._init_decoder()
503
if decode_content is None:
504
decode_content = self.decode_content
505
506
if self._fp is None:
507
return
508
509
flush_decoder = False
510
fp_closed = getattr(self._fp, "closed", False)
511
512
with self._error_catcher():
513
if amt is None:
514
# cStringIO doesn't like amt=None
515
data = self._fp.read() if not fp_closed else b""
516
flush_decoder = True
517
else:
518
cache_content = False
519
data = self._fp.read(amt) if not fp_closed else b""
520
if (
521
amt != 0 and not data
522
): # Platform-specific: Buggy versions of Python.
523
# Close the connection when no data is returned
524
#
525
# This is redundant to what httplib/http.client _should_
526
# already do. However, versions of python released before
527
# December 15, 2012 (http://bugs.python.org/issue16298) do
528
# not properly close the connection in all cases. There is
529
# no harm in redundantly calling close.
530
self._fp.close()
531
flush_decoder = True
532
if self.enforce_content_length and self.length_remaining not in (
533
0,
534
None,
535
):
536
# This is an edge case that httplib failed to cover due
537
# to concerns of backward compatibility. We're
538
# addressing it here to make sure IncompleteRead is
539
# raised during streaming, so all calls with incorrect
540
# Content-Length are caught.
541
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
542
543
if data:
544
self._fp_bytes_read += len(data)
545
if self.length_remaining is not None:
546
self.length_remaining -= len(data)
547
548
data = self._decode(data, decode_content, flush_decoder)
549
550
if cache_content:
551
self._body = data
552
553
return data
554
555
def stream(self, amt=2 ** 16, decode_content=None):
556
"""
557
A generator wrapper for the read() method. A call will block until
558
``amt`` bytes have been read from the connection or until the
559
connection is closed.
560
561
:param amt:
562
How much of the content to read. The generator will return up to
563
much data per iteration, but may return less. This is particularly
564
likely when using compressed data. However, the empty string will
565
never be returned.
566
567
:param decode_content:
568
If True, will attempt to decode the body based on the
569
'content-encoding' header.
570
"""
571
if self.chunked and self.supports_chunked_reads():
572
for line in self.read_chunked(amt, decode_content=decode_content):
573
yield line
574
else:
575
while not is_fp_closed(self._fp):
576
data = self.read(amt=amt, decode_content=decode_content)
577
578
if data:
579
yield data
580
581
@classmethod
582
def from_httplib(ResponseCls, r, **response_kw):
583
"""
584
Given an :class:`httplib.HTTPResponse` instance ``r``, return a
585
corresponding :class:`urllib3.response.HTTPResponse` object.
586
587
Remaining parameters are passed to the HTTPResponse constructor, along
588
with ``original_response=r``.
589
"""
590
headers = r.msg
591
592
if not isinstance(headers, HTTPHeaderDict):
593
if PY3:
594
headers = HTTPHeaderDict(headers.items())
595
else:
596
# Python 2.7
597
headers = HTTPHeaderDict.from_httplib(headers)
598
599
# HTTPResponse objects in Python 3 don't have a .strict attribute
600
strict = getattr(r, "strict", 0)
601
resp = ResponseCls(
602
body=r,
603
headers=headers,
604
status=r.status,
605
version=r.version,
606
reason=r.reason,
607
strict=strict,
608
original_response=r,
609
**response_kw
610
)
611
return resp
612
613
# Backwards-compatibility methods for httplib.HTTPResponse
614
def getheaders(self):
615
return self.headers
616
617
def getheader(self, name, default=None):
618
return self.headers.get(name, default)
619
620
# Backwards compatibility for http.cookiejar
621
def info(self):
622
return self.headers
623
624
# Overrides from io.IOBase
625
def close(self):
626
if not self.closed:
627
self._fp.close()
628
629
if self._connection:
630
self._connection.close()
631
632
if not self.auto_close:
633
io.IOBase.close(self)
634
635
@property
636
def closed(self):
637
if not self.auto_close:
638
return io.IOBase.closed.__get__(self)
639
elif self._fp is None:
640
return True
641
elif hasattr(self._fp, "isclosed"):
642
return self._fp.isclosed()
643
elif hasattr(self._fp, "closed"):
644
return self._fp.closed
645
else:
646
return True
647
648
def fileno(self):
649
if self._fp is None:
650
raise IOError("HTTPResponse has no file to get a fileno from")
651
elif hasattr(self._fp, "fileno"):
652
return self._fp.fileno()
653
else:
654
raise IOError(
655
"The file-like object this HTTPResponse is wrapped "
656
"around has no file descriptor"
657
)
658
659
def flush(self):
660
if (
661
self._fp is not None
662
and hasattr(self._fp, "flush")
663
and not getattr(self._fp, "closed", False)
664
):
665
return self._fp.flush()
666
667
def readable(self):
668
# This method is required for `io` module compatibility.
669
return True
670
671
def readinto(self, b):
672
# This method is required for `io` module compatibility.
673
temp = self.read(len(b))
674
if len(temp) == 0:
675
return 0
676
else:
677
b[: len(temp)] = temp
678
return len(temp)
679
680
def supports_chunked_reads(self):
681
"""
682
Checks if the underlying file-like object looks like a
683
httplib.HTTPResponse object. We do this by testing for the fp
684
attribute. If it is present we assume it returns raw chunks as
685
processed by read_chunked().
686
"""
687
return hasattr(self._fp, "fp")
688
689
def _update_chunk_length(self):
690
# First, we'll figure out length of a chunk and then
691
# we'll try to read it from socket.
692
if self.chunk_left is not None:
693
return
694
line = self._fp.fp.readline()
695
line = line.split(b";", 1)[0]
696
try:
697
self.chunk_left = int(line, 16)
698
except ValueError:
699
# Invalid chunked protocol response, abort.
700
self.close()
701
raise httplib.IncompleteRead(line)
702
703
def _handle_chunk(self, amt):
704
returned_chunk = None
705
if amt is None:
706
chunk = self._fp._safe_read(self.chunk_left)
707
returned_chunk = chunk
708
self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
709
self.chunk_left = None
710
elif amt < self.chunk_left:
711
value = self._fp._safe_read(amt)
712
self.chunk_left = self.chunk_left - amt
713
returned_chunk = value
714
elif amt == self.chunk_left:
715
value = self._fp._safe_read(amt)
716
self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
717
self.chunk_left = None
718
returned_chunk = value
719
else: # amt > self.chunk_left
720
returned_chunk = self._fp._safe_read(self.chunk_left)
721
self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
722
self.chunk_left = None
723
return returned_chunk
724
725
def read_chunked(self, amt=None, decode_content=None):
726
"""
727
Similar to :meth:`HTTPResponse.read`, but with an additional
728
parameter: ``decode_content``.
729
730
:param amt:
731
How much of the content to read. If specified, caching is skipped
732
because it doesn't make sense to cache partial content as the full
733
response.
734
735
:param decode_content:
736
If True, will attempt to decode the body based on the
737
'content-encoding' header.
738
"""
739
self._init_decoder()
740
# FIXME: Rewrite this method and make it a class with a better structured logic.
741
if not self.chunked:
742
raise ResponseNotChunked(
743
"Response is not chunked. "
744
"Header 'transfer-encoding: chunked' is missing."
745
)
746
if not self.supports_chunked_reads():
747
raise BodyNotHttplibCompatible(
748
"Body should be httplib.HTTPResponse like. "
749
"It should have have an fp attribute which returns raw chunks."
750
)
751
752
with self._error_catcher():
753
# Don't bother reading the body of a HEAD request.
754
if self._original_response and is_response_to_head(self._original_response):
755
self._original_response.close()
756
return
757
758
# If a response is already read and closed
759
# then return immediately.
760
if self._fp.fp is None:
761
return
762
763
while True:
764
self._update_chunk_length()
765
if self.chunk_left == 0:
766
break
767
chunk = self._handle_chunk(amt)
768
decoded = self._decode(
769
chunk, decode_content=decode_content, flush_decoder=False
770
)
771
if decoded:
772
yield decoded
773
774
if decode_content:
775
# On CPython and PyPy, we should never need to flush the
776
# decoder. However, on Jython we *might* need to, so
777
# lets defensively do it anyway.
778
decoded = self._flush_decoder()
779
if decoded: # Platform-specific: Jython.
780
yield decoded
781
782
# Chunk content ends with \r\n: discard it.
783
while True:
784
line = self._fp.fp.readline()
785
if not line:
786
# Some sites may not end with '\r\n'.
787
break
788
if line == b"\r\n":
789
break
790
791
# We read everything; close the "file".
792
if self._original_response:
793
self._original_response.close()
794
795
def geturl(self):
796
"""
797
Returns the URL that was the source of this response.
798
If the request that generated this response redirected, this method
799
will return the final redirect location.
800
"""
801
if self.retries is not None and len(self.retries.history):
802
return self.retries.history[-1].redirect_location
803
else:
804
return self._request_url
805
806
def __iter__(self):
807
buffer = []
808
for chunk in self.stream(decode_content=True):
809
if b"\n" in chunk:
810
chunk = chunk.split(b"\n")
811
yield b"".join(buffer) + chunk[0] + b"\n"
812
for x in chunk[1:-1]:
813
yield x + b"\n"
814
if chunk[-1]:
815
buffer = [chunk[-1]]
816
else:
817
buffer = []
818
else:
819
buffer.append(chunk)
820
if buffer:
821
yield b"".join(buffer)
822
823