Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hhhrrrttt222111
GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/requests/utils.py
811 views
1
# -*- coding: utf-8 -*-
2
3
"""
4
requests.utils
5
~~~~~~~~~~~~~~
6
7
This module provides utility functions that are used within Requests
8
that are also useful for external consumption.
9
"""
10
11
import codecs
12
import contextlib
13
import io
14
import os
15
import re
16
import socket
17
import struct
18
import sys
19
import tempfile
20
import warnings
21
import zipfile
22
from collections import OrderedDict
23
24
from .__version__ import __version__
25
from . import certs
26
# to_native_string is unused here, but imported here for backwards compatibility
27
from ._internal_utils import to_native_string
28
from .compat import parse_http_list as _parse_list_header
29
from .compat import (
30
quote, urlparse, bytes, str, unquote, getproxies,
31
proxy_bypass, urlunparse, basestring, integer_types, is_py3,
32
proxy_bypass_environment, getproxies_environment, Mapping)
33
from .cookies import cookiejar_from_dict
34
from .structures import CaseInsensitiveDict
35
from .exceptions import (
36
InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
37
38
NETRC_FILES = ('.netrc', '_netrc')
39
40
DEFAULT_CA_BUNDLE_PATH = certs.where()
41
42
DEFAULT_PORTS = {'http': 80, 'https': 443}
43
44
45
if sys.platform == 'win32':
46
# provide a proxy_bypass version on Windows without DNS lookups
47
48
def proxy_bypass_registry(host):
49
try:
50
if is_py3:
51
import winreg
52
else:
53
import _winreg as winreg
54
except ImportError:
55
return False
56
57
try:
58
internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
59
r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
60
# ProxyEnable could be REG_SZ or REG_DWORD, normalizing it
61
proxyEnable = int(winreg.QueryValueEx(internetSettings,
62
'ProxyEnable')[0])
63
# ProxyOverride is almost always a string
64
proxyOverride = winreg.QueryValueEx(internetSettings,
65
'ProxyOverride')[0]
66
except OSError:
67
return False
68
if not proxyEnable or not proxyOverride:
69
return False
70
71
# make a check value list from the registry entry: replace the
72
# '<local>' string by the localhost entry and the corresponding
73
# canonical entry.
74
proxyOverride = proxyOverride.split(';')
75
# now check if we match one of the registry values.
76
for test in proxyOverride:
77
if test == '<local>':
78
if '.' not in host:
79
return True
80
test = test.replace(".", r"\.") # mask dots
81
test = test.replace("*", r".*") # change glob sequence
82
test = test.replace("?", r".") # change glob char
83
if re.match(test, host, re.I):
84
return True
85
return False
86
87
def proxy_bypass(host): # noqa
88
"""Return True, if the host should be bypassed.
89
90
Checks proxy settings gathered from the environment, if specified,
91
or the registry.
92
"""
93
if getproxies_environment():
94
return proxy_bypass_environment(host)
95
else:
96
return proxy_bypass_registry(host)
97
98
99
def dict_to_sequence(d):
100
"""Returns an internal sequence dictionary update."""
101
102
if hasattr(d, 'items'):
103
d = d.items()
104
105
return d
106
107
108
def super_len(o):
109
total_length = None
110
current_position = 0
111
112
if hasattr(o, '__len__'):
113
total_length = len(o)
114
115
elif hasattr(o, 'len'):
116
total_length = o.len
117
118
elif hasattr(o, 'fileno'):
119
try:
120
fileno = o.fileno()
121
except io.UnsupportedOperation:
122
pass
123
else:
124
total_length = os.fstat(fileno).st_size
125
126
# Having used fstat to determine the file length, we need to
127
# confirm that this file was opened up in binary mode.
128
if 'b' not in o.mode:
129
warnings.warn((
130
"Requests has determined the content-length for this "
131
"request using the binary size of the file: however, the "
132
"file has been opened in text mode (i.e. without the 'b' "
133
"flag in the mode). This may lead to an incorrect "
134
"content-length. In Requests 3.0, support will be removed "
135
"for files in text mode."),
136
FileModeWarning
137
)
138
139
if hasattr(o, 'tell'):
140
try:
141
current_position = o.tell()
142
except (OSError, IOError):
143
# This can happen in some weird situations, such as when the file
144
# is actually a special file descriptor like stdin. In this
145
# instance, we don't know what the length is, so set it to zero and
146
# let requests chunk it instead.
147
if total_length is not None:
148
current_position = total_length
149
else:
150
if hasattr(o, 'seek') and total_length is None:
151
# StringIO and BytesIO have seek but no useable fileno
152
try:
153
# seek to end of file
154
o.seek(0, 2)
155
total_length = o.tell()
156
157
# seek back to current position to support
158
# partially read file-like objects
159
o.seek(current_position or 0)
160
except (OSError, IOError):
161
total_length = 0
162
163
if total_length is None:
164
total_length = 0
165
166
return max(0, total_length - current_position)
167
168
169
def get_netrc_auth(url, raise_errors=False):
170
"""Returns the Requests tuple auth for a given url from netrc."""
171
172
try:
173
from netrc import netrc, NetrcParseError
174
175
netrc_path = None
176
177
for f in NETRC_FILES:
178
try:
179
loc = os.path.expanduser('~/{}'.format(f))
180
except KeyError:
181
# os.path.expanduser can fail when $HOME is undefined and
182
# getpwuid fails. See https://bugs.python.org/issue20164 &
183
# https://github.com/psf/requests/issues/1846
184
return
185
186
if os.path.exists(loc):
187
netrc_path = loc
188
break
189
190
# Abort early if there isn't one.
191
if netrc_path is None:
192
return
193
194
ri = urlparse(url)
195
196
# Strip port numbers from netloc. This weird `if...encode`` dance is
197
# used for Python 3.2, which doesn't support unicode literals.
198
splitstr = b':'
199
if isinstance(url, str):
200
splitstr = splitstr.decode('ascii')
201
host = ri.netloc.split(splitstr)[0]
202
203
try:
204
_netrc = netrc(netrc_path).authenticators(host)
205
if _netrc:
206
# Return with login / password
207
login_i = (0 if _netrc[0] else 1)
208
return (_netrc[login_i], _netrc[2])
209
except (NetrcParseError, IOError):
210
# If there was a parsing error or a permissions issue reading the file,
211
# we'll just skip netrc auth unless explicitly asked to raise errors.
212
if raise_errors:
213
raise
214
215
# AppEngine hackiness.
216
except (ImportError, AttributeError):
217
pass
218
219
220
def guess_filename(obj):
221
"""Tries to guess the filename of the given object."""
222
name = getattr(obj, 'name', None)
223
if (name and isinstance(name, basestring) and name[0] != '<' and
224
name[-1] != '>'):
225
return os.path.basename(name)
226
227
228
def extract_zipped_paths(path):
229
"""Replace nonexistent paths that look like they refer to a member of a zip
230
archive with the location of an extracted copy of the target, or else
231
just return the provided path unchanged.
232
"""
233
if os.path.exists(path):
234
# this is already a valid path, no need to do anything further
235
return path
236
237
# find the first valid part of the provided path and treat that as a zip archive
238
# assume the rest of the path is the name of a member in the archive
239
archive, member = os.path.split(path)
240
while archive and not os.path.exists(archive):
241
archive, prefix = os.path.split(archive)
242
member = '/'.join([prefix, member])
243
244
if not zipfile.is_zipfile(archive):
245
return path
246
247
zip_file = zipfile.ZipFile(archive)
248
if member not in zip_file.namelist():
249
return path
250
251
# we have a valid zip archive and a valid member of that archive
252
tmp = tempfile.gettempdir()
253
extracted_path = os.path.join(tmp, *member.split('/'))
254
if not os.path.exists(extracted_path):
255
extracted_path = zip_file.extract(member, path=tmp)
256
257
return extracted_path
258
259
260
def from_key_val_list(value):
261
"""Take an object and test to see if it can be represented as a
262
dictionary. Unless it can not be represented as such, return an
263
OrderedDict, e.g.,
264
265
::
266
267
>>> from_key_val_list([('key', 'val')])
268
OrderedDict([('key', 'val')])
269
>>> from_key_val_list('string')
270
Traceback (most recent call last):
271
...
272
ValueError: cannot encode objects that are not 2-tuples
273
>>> from_key_val_list({'key': 'val'})
274
OrderedDict([('key', 'val')])
275
276
:rtype: OrderedDict
277
"""
278
if value is None:
279
return None
280
281
if isinstance(value, (str, bytes, bool, int)):
282
raise ValueError('cannot encode objects that are not 2-tuples')
283
284
return OrderedDict(value)
285
286
287
def to_key_val_list(value):
288
"""Take an object and test to see if it can be represented as a
289
dictionary. If it can be, return a list of tuples, e.g.,
290
291
::
292
293
>>> to_key_val_list([('key', 'val')])
294
[('key', 'val')]
295
>>> to_key_val_list({'key': 'val'})
296
[('key', 'val')]
297
>>> to_key_val_list('string')
298
Traceback (most recent call last):
299
...
300
ValueError: cannot encode objects that are not 2-tuples
301
302
:rtype: list
303
"""
304
if value is None:
305
return None
306
307
if isinstance(value, (str, bytes, bool, int)):
308
raise ValueError('cannot encode objects that are not 2-tuples')
309
310
if isinstance(value, Mapping):
311
value = value.items()
312
313
return list(value)
314
315
316
# From mitsuhiko/werkzeug (used with permission).
317
def parse_list_header(value):
318
"""Parse lists as described by RFC 2068 Section 2.
319
320
In particular, parse comma-separated lists where the elements of
321
the list may include quoted-strings. A quoted-string could
322
contain a comma. A non-quoted string could have quotes in the
323
middle. Quotes are removed automatically after parsing.
324
325
It basically works like :func:`parse_set_header` just that items
326
may appear multiple times and case sensitivity is preserved.
327
328
The return value is a standard :class:`list`:
329
330
>>> parse_list_header('token, "quoted value"')
331
['token', 'quoted value']
332
333
To create a header from the :class:`list` again, use the
334
:func:`dump_header` function.
335
336
:param value: a string with a list header.
337
:return: :class:`list`
338
:rtype: list
339
"""
340
result = []
341
for item in _parse_list_header(value):
342
if item[:1] == item[-1:] == '"':
343
item = unquote_header_value(item[1:-1])
344
result.append(item)
345
return result
346
347
348
# From mitsuhiko/werkzeug (used with permission).
349
def parse_dict_header(value):
350
"""Parse lists of key, value pairs as described by RFC 2068 Section 2 and
351
convert them into a python dict:
352
353
>>> d = parse_dict_header('foo="is a fish", bar="as well"')
354
>>> type(d) is dict
355
True
356
>>> sorted(d.items())
357
[('bar', 'as well'), ('foo', 'is a fish')]
358
359
If there is no value for a key it will be `None`:
360
361
>>> parse_dict_header('key_without_value')
362
{'key_without_value': None}
363
364
To create a header from the :class:`dict` again, use the
365
:func:`dump_header` function.
366
367
:param value: a string with a dict header.
368
:return: :class:`dict`
369
:rtype: dict
370
"""
371
result = {}
372
for item in _parse_list_header(value):
373
if '=' not in item:
374
result[item] = None
375
continue
376
name, value = item.split('=', 1)
377
if value[:1] == value[-1:] == '"':
378
value = unquote_header_value(value[1:-1])
379
result[name] = value
380
return result
381
382
383
# From mitsuhiko/werkzeug (used with permission).
384
def unquote_header_value(value, is_filename=False):
385
r"""Unquotes a header value. (Reversal of :func:`quote_header_value`).
386
This does not use the real unquoting but what browsers are actually
387
using for quoting.
388
389
:param value: the header value to unquote.
390
:rtype: str
391
"""
392
if value and value[0] == value[-1] == '"':
393
# this is not the real unquoting, but fixing this so that the
394
# RFC is met will result in bugs with internet explorer and
395
# probably some other browsers as well. IE for example is
396
# uploading files with "C:\foo\bar.txt" as filename
397
value = value[1:-1]
398
399
# if this is a filename and the starting characters look like
400
# a UNC path, then just return the value without quotes. Using the
401
# replace sequence below on a UNC path has the effect of turning
402
# the leading double slash into a single slash and then
403
# _fix_ie_filename() doesn't work correctly. See #458.
404
if not is_filename or value[:2] != '\\\\':
405
return value.replace('\\\\', '\\').replace('\\"', '"')
406
return value
407
408
409
def dict_from_cookiejar(cj):
410
"""Returns a key/value dictionary from a CookieJar.
411
412
:param cj: CookieJar object to extract cookies from.
413
:rtype: dict
414
"""
415
416
cookie_dict = {}
417
418
for cookie in cj:
419
cookie_dict[cookie.name] = cookie.value
420
421
return cookie_dict
422
423
424
def add_dict_to_cookiejar(cj, cookie_dict):
425
"""Returns a CookieJar from a key/value dictionary.
426
427
:param cj: CookieJar to insert cookies into.
428
:param cookie_dict: Dict of key/values to insert into CookieJar.
429
:rtype: CookieJar
430
"""
431
432
return cookiejar_from_dict(cookie_dict, cj)
433
434
435
def get_encodings_from_content(content):
436
"""Returns encodings from given content string.
437
438
:param content: bytestring to extract encodings from.
439
"""
440
warnings.warn((
441
'In requests 3.0, get_encodings_from_content will be removed. For '
442
'more information, please see the discussion on issue #2266. (This'
443
' warning should only appear once.)'),
444
DeprecationWarning)
445
446
charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
447
pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
448
xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
449
450
return (charset_re.findall(content) +
451
pragma_re.findall(content) +
452
xml_re.findall(content))
453
454
455
def _parse_content_type_header(header):
456
"""Returns content type and parameters from given header
457
458
:param header: string
459
:return: tuple containing content type and dictionary of
460
parameters
461
"""
462
463
tokens = header.split(';')
464
content_type, params = tokens[0].strip(), tokens[1:]
465
params_dict = {}
466
items_to_strip = "\"' "
467
468
for param in params:
469
param = param.strip()
470
if param:
471
key, value = param, True
472
index_of_equals = param.find("=")
473
if index_of_equals != -1:
474
key = param[:index_of_equals].strip(items_to_strip)
475
value = param[index_of_equals + 1:].strip(items_to_strip)
476
params_dict[key.lower()] = value
477
return content_type, params_dict
478
479
480
def get_encoding_from_headers(headers):
481
"""Returns encodings from given HTTP Header Dict.
482
483
:param headers: dictionary to extract encoding from.
484
:rtype: str
485
"""
486
487
content_type = headers.get('content-type')
488
489
if not content_type:
490
return None
491
492
content_type, params = _parse_content_type_header(content_type)
493
494
if 'charset' in params:
495
return params['charset'].strip("'\"")
496
497
if 'text' in content_type:
498
return 'ISO-8859-1'
499
500
501
def stream_decode_response_unicode(iterator, r):
502
"""Stream decodes a iterator."""
503
504
if r.encoding is None:
505
for item in iterator:
506
yield item
507
return
508
509
decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
510
for chunk in iterator:
511
rv = decoder.decode(chunk)
512
if rv:
513
yield rv
514
rv = decoder.decode(b'', final=True)
515
if rv:
516
yield rv
517
518
519
def iter_slices(string, slice_length):
520
"""Iterate over slices of a string."""
521
pos = 0
522
if slice_length is None or slice_length <= 0:
523
slice_length = len(string)
524
while pos < len(string):
525
yield string[pos:pos + slice_length]
526
pos += slice_length
527
528
529
def get_unicode_from_response(r):
530
"""Returns the requested content back in unicode.
531
532
:param r: Response object to get unicode content from.
533
534
Tried:
535
536
1. charset from content-type
537
2. fall back and replace all unicode characters
538
539
:rtype: str
540
"""
541
warnings.warn((
542
'In requests 3.0, get_unicode_from_response will be removed. For '
543
'more information, please see the discussion on issue #2266. (This'
544
' warning should only appear once.)'),
545
DeprecationWarning)
546
547
tried_encodings = []
548
549
# Try charset from content-type
550
encoding = get_encoding_from_headers(r.headers)
551
552
if encoding:
553
try:
554
return str(r.content, encoding)
555
except UnicodeError:
556
tried_encodings.append(encoding)
557
558
# Fall back:
559
try:
560
return str(r.content, encoding, errors='replace')
561
except TypeError:
562
return r.content
563
564
565
# The unreserved URI characters (RFC 3986)
566
UNRESERVED_SET = frozenset(
567
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
568
569
570
def unquote_unreserved(uri):
571
"""Un-escape any percent-escape sequences in a URI that are unreserved
572
characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
573
574
:rtype: str
575
"""
576
parts = uri.split('%')
577
for i in range(1, len(parts)):
578
h = parts[i][0:2]
579
if len(h) == 2 and h.isalnum():
580
try:
581
c = chr(int(h, 16))
582
except ValueError:
583
raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
584
585
if c in UNRESERVED_SET:
586
parts[i] = c + parts[i][2:]
587
else:
588
parts[i] = '%' + parts[i]
589
else:
590
parts[i] = '%' + parts[i]
591
return ''.join(parts)
592
593
594
def requote_uri(uri):
595
"""Re-quote the given URI.
596
597
This function passes the given URI through an unquote/quote cycle to
598
ensure that it is fully and consistently quoted.
599
600
:rtype: str
601
"""
602
safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
603
safe_without_percent = "!#$&'()*+,/:;=?@[]~"
604
try:
605
# Unquote only the unreserved characters
606
# Then quote only illegal characters (do not quote reserved,
607
# unreserved, or '%')
608
return quote(unquote_unreserved(uri), safe=safe_with_percent)
609
except InvalidURL:
610
# We couldn't unquote the given URI, so let's try quoting it, but
611
# there may be unquoted '%'s in the URI. We need to make sure they're
612
# properly quoted so they do not cause issues elsewhere.
613
return quote(uri, safe=safe_without_percent)
614
615
616
def address_in_network(ip, net):
617
"""This function allows you to check if an IP belongs to a network subnet
618
619
Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
620
returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
621
622
:rtype: bool
623
"""
624
ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
625
netaddr, bits = net.split('/')
626
netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
627
network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
628
return (ipaddr & netmask) == (network & netmask)
629
630
631
def dotted_netmask(mask):
632
"""Converts mask from /xx format to xxx.xxx.xxx.xxx
633
634
Example: if mask is 24 function returns 255.255.255.0
635
636
:rtype: str
637
"""
638
bits = 0xffffffff ^ (1 << 32 - mask) - 1
639
return socket.inet_ntoa(struct.pack('>I', bits))
640
641
642
def is_ipv4_address(string_ip):
643
"""
644
:rtype: bool
645
"""
646
try:
647
socket.inet_aton(string_ip)
648
except socket.error:
649
return False
650
return True
651
652
653
def is_valid_cidr(string_network):
654
"""
655
Very simple check of the cidr format in no_proxy variable.
656
657
:rtype: bool
658
"""
659
if string_network.count('/') == 1:
660
try:
661
mask = int(string_network.split('/')[1])
662
except ValueError:
663
return False
664
665
if mask < 1 or mask > 32:
666
return False
667
668
try:
669
socket.inet_aton(string_network.split('/')[0])
670
except socket.error:
671
return False
672
else:
673
return False
674
return True
675
676
677
@contextlib.contextmanager
678
def set_environ(env_name, value):
679
"""Set the environment variable 'env_name' to 'value'
680
681
Save previous value, yield, and then restore the previous value stored in
682
the environment variable 'env_name'.
683
684
If 'value' is None, do nothing"""
685
value_changed = value is not None
686
if value_changed:
687
old_value = os.environ.get(env_name)
688
os.environ[env_name] = value
689
try:
690
yield
691
finally:
692
if value_changed:
693
if old_value is None:
694
del os.environ[env_name]
695
else:
696
os.environ[env_name] = old_value
697
698
699
def should_bypass_proxies(url, no_proxy):
700
"""
701
Returns whether we should bypass proxies or not.
702
703
:rtype: bool
704
"""
705
# Prioritize lowercase environment variables over uppercase
706
# to keep a consistent behaviour with other http projects (curl, wget).
707
get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
708
709
# First check whether no_proxy is defined. If it is, check that the URL
710
# we're getting isn't in the no_proxy list.
711
no_proxy_arg = no_proxy
712
if no_proxy is None:
713
no_proxy = get_proxy('no_proxy')
714
parsed = urlparse(url)
715
716
if parsed.hostname is None:
717
# URLs don't always have hostnames, e.g. file:/// urls.
718
return True
719
720
if no_proxy:
721
# We need to check whether we match here. We need to see if we match
722
# the end of the hostname, both with and without the port.
723
no_proxy = (
724
host for host in no_proxy.replace(' ', '').split(',') if host
725
)
726
727
if is_ipv4_address(parsed.hostname):
728
for proxy_ip in no_proxy:
729
if is_valid_cidr(proxy_ip):
730
if address_in_network(parsed.hostname, proxy_ip):
731
return True
732
elif parsed.hostname == proxy_ip:
733
# If no_proxy ip was defined in plain IP notation instead of cidr notation &
734
# matches the IP of the index
735
return True
736
else:
737
host_with_port = parsed.hostname
738
if parsed.port:
739
host_with_port += ':{}'.format(parsed.port)
740
741
for host in no_proxy:
742
if parsed.hostname.endswith(host) or host_with_port.endswith(host):
743
# The URL does match something in no_proxy, so we don't want
744
# to apply the proxies on this URL.
745
return True
746
747
with set_environ('no_proxy', no_proxy_arg):
748
# parsed.hostname can be `None` in cases such as a file URI.
749
try:
750
bypass = proxy_bypass(parsed.hostname)
751
except (TypeError, socket.gaierror):
752
bypass = False
753
754
if bypass:
755
return True
756
757
return False
758
759
760
def get_environ_proxies(url, no_proxy=None):
761
"""
762
Return a dict of environment proxies.
763
764
:rtype: dict
765
"""
766
if should_bypass_proxies(url, no_proxy=no_proxy):
767
return {}
768
else:
769
return getproxies()
770
771
772
def select_proxy(url, proxies):
773
"""Select a proxy for the url, if applicable.
774
775
:param url: The url being for the request
776
:param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
777
"""
778
proxies = proxies or {}
779
urlparts = urlparse(url)
780
if urlparts.hostname is None:
781
return proxies.get(urlparts.scheme, proxies.get('all'))
782
783
proxy_keys = [
784
urlparts.scheme + '://' + urlparts.hostname,
785
urlparts.scheme,
786
'all://' + urlparts.hostname,
787
'all',
788
]
789
proxy = None
790
for proxy_key in proxy_keys:
791
if proxy_key in proxies:
792
proxy = proxies[proxy_key]
793
break
794
795
return proxy
796
797
798
def default_user_agent(name="python-requests"):
799
"""
800
Return a string representing the default user agent.
801
802
:rtype: str
803
"""
804
return '%s/%s' % (name, __version__)
805
806
807
def default_headers():
808
"""
809
:rtype: requests.structures.CaseInsensitiveDict
810
"""
811
return CaseInsensitiveDict({
812
'User-Agent': default_user_agent(),
813
'Accept-Encoding': ', '.join(('gzip', 'deflate')),
814
'Accept': '*/*',
815
'Connection': 'keep-alive',
816
})
817
818
819
def parse_header_links(value):
820
"""Return a list of parsed link headers proxies.
821
822
i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
823
824
:rtype: list
825
"""
826
827
links = []
828
829
replace_chars = ' \'"'
830
831
value = value.strip(replace_chars)
832
if not value:
833
return links
834
835
for val in re.split(', *<', value):
836
try:
837
url, params = val.split(';', 1)
838
except ValueError:
839
url, params = val, ''
840
841
link = {'url': url.strip('<> \'"')}
842
843
for param in params.split(';'):
844
try:
845
key, value = param.split('=')
846
except ValueError:
847
break
848
849
link[key.strip(replace_chars)] = value.strip(replace_chars)
850
851
links.append(link)
852
853
return links
854
855
856
# Null bytes; no need to recreate these on each call to guess_json_utf
857
_null = '\x00'.encode('ascii') # encoding to ASCII for Python 3
858
_null2 = _null * 2
859
_null3 = _null * 3
860
861
862
def guess_json_utf(data):
863
"""
864
:rtype: str
865
"""
866
# JSON always starts with two ASCII characters, so detection is as
867
# easy as counting the nulls and from their location and count
868
# determine the encoding. Also detect a BOM, if present.
869
sample = data[:4]
870
if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
871
return 'utf-32' # BOM included
872
if sample[:3] == codecs.BOM_UTF8:
873
return 'utf-8-sig' # BOM included, MS style (discouraged)
874
if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
875
return 'utf-16' # BOM included
876
nullcount = sample.count(_null)
877
if nullcount == 0:
878
return 'utf-8'
879
if nullcount == 2:
880
if sample[::2] == _null2: # 1st and 3rd are null
881
return 'utf-16-be'
882
if sample[1::2] == _null2: # 2nd and 4th are null
883
return 'utf-16-le'
884
# Did not detect 2 valid UTF-16 ascii-range characters
885
if nullcount == 3:
886
if sample[:3] == _null3:
887
return 'utf-32-be'
888
if sample[1:] == _null3:
889
return 'utf-32-le'
890
# Did not detect a valid UTF-32 ascii-range character
891
return None
892
893
894
def prepend_scheme_if_needed(url, new_scheme):
895
"""Given a URL that may or may not have a scheme, prepend the given scheme.
896
Does not replace a present scheme with the one provided as an argument.
897
898
:rtype: str
899
"""
900
scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
901
902
# urlparse is a finicky beast, and sometimes decides that there isn't a
903
# netloc present. Assume that it's being over-cautious, and switch netloc
904
# and path if urlparse decided there was no netloc.
905
if not netloc:
906
netloc, path = path, netloc
907
908
return urlunparse((scheme, netloc, path, params, query, fragment))
909
910
911
def get_auth_from_url(url):
912
"""Given a url with authentication components, extract them into a tuple of
913
username,password.
914
915
:rtype: (str,str)
916
"""
917
parsed = urlparse(url)
918
919
try:
920
auth = (unquote(parsed.username), unquote(parsed.password))
921
except (AttributeError, TypeError):
922
auth = ('', '')
923
924
return auth
925
926
927
# Moved outside of function to avoid recompile every call
928
_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
929
_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
930
931
932
def check_header_validity(header):
933
"""Verifies that header value is a string which doesn't contain
934
leading whitespace or return characters. This prevents unintended
935
header injection.
936
937
:param header: tuple, in the format (name, value).
938
"""
939
name, value = header
940
941
if isinstance(value, bytes):
942
pat = _CLEAN_HEADER_REGEX_BYTE
943
else:
944
pat = _CLEAN_HEADER_REGEX_STR
945
try:
946
if not pat.match(value):
947
raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
948
except TypeError:
949
raise InvalidHeader("Value for header {%s: %s} must be of type str or "
950
"bytes, not %s" % (name, value, type(value)))
951
952
953
def urldefragauth(url):
954
"""
955
Given a url remove the fragment and the authentication part.
956
957
:rtype: str
958
"""
959
scheme, netloc, path, params, query, fragment = urlparse(url)
960
961
# see func:`prepend_scheme_if_needed`
962
if not netloc:
963
netloc, path = path, netloc
964
965
netloc = netloc.rsplit('@', 1)[-1]
966
967
return urlunparse((scheme, netloc, path, params, query, ''))
968
969
970
def rewind_body(prepared_request):
971
"""Move file pointer back to its recorded starting position
972
so it can be read again on redirect.
973
"""
974
body_seek = getattr(prepared_request.body, 'seek', None)
975
if body_seek is not None and isinstance(prepared_request._body_position, integer_types):
976
try:
977
body_seek(prepared_request._body_position)
978
except (IOError, OSError):
979
raise UnrewindableBodyError("An error occurred when rewinding request "
980
"body for redirect.")
981
else:
982
raise UnrewindableBodyError("Unable to rewind request body for redirect.")
983
984