CoCalc -- utils.py

GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/requests/utils.py
⁸¹¹ views
1
# -*- coding: utf-8 -*-
2

3
"""
4
requests.utils
5
~~~~~~~~~~~~~~
6

7
This module provides utility functions that are used within Requests
8
that are also useful for external consumption.
9
"""
10

11
import codecs
12
import contextlib
13
import io
14
import os
15
import re
16
import socket
17
import struct
18
import sys
19
import tempfile
20
import warnings
21
import zipfile
22
from collections import OrderedDict
23

24
from .__version__ import __version__
25
from . import certs
26
# to_native_string is unused here, but imported here for backwards compatibility
27
from ._internal_utils import to_native_string
28
from .compat import parse_http_list as _parse_list_header
29
from .compat import (
30
    quote, urlparse, bytes, str, unquote, getproxies,
31
    proxy_bypass, urlunparse, basestring, integer_types, is_py3,
32
    proxy_bypass_environment, getproxies_environment, Mapping)
33
from .cookies import cookiejar_from_dict
34
from .structures import CaseInsensitiveDict
35
from .exceptions import (
36
    InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
37

38
NETRC_FILES = ('.netrc', '_netrc')
39

40
DEFAULT_CA_BUNDLE_PATH = certs.where()
41

42
DEFAULT_PORTS = {'http': 80, 'https': 443}
43

44

45
if sys.platform == 'win32':
46
    # provide a proxy_bypass version on Windows without DNS lookups
47

48
    def proxy_bypass_registry(host):
49
        try:
50
            if is_py3:
51
                import winreg
52
            else:
53
                import _winreg as winreg
54
        except ImportError:
55
            return False
56

57
        try:
58
            internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
59
                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
60
            # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it
61
            proxyEnable = int(winreg.QueryValueEx(internetSettings,
62
                                              'ProxyEnable')[0])
63
            # ProxyOverride is almost always a string
64
            proxyOverride = winreg.QueryValueEx(internetSettings,
65
                                                'ProxyOverride')[0]
66
        except OSError:
67
            return False
68
        if not proxyEnable or not proxyOverride:
69
            return False
70

71
        # make a check value list from the registry entry: replace the
72
        # '<local>' string by the localhost entry and the corresponding
73
        # canonical entry.
74
        proxyOverride = proxyOverride.split(';')
75
        # now check if we match one of the registry values.
76
        for test in proxyOverride:
77
            if test == '<local>':
78
                if '.' not in host:
79
                    return True
80
            test = test.replace(".", r"\.")     # mask dots
81
            test = test.replace("*", r".*")     # change glob sequence
82
            test = test.replace("?", r".")      # change glob char
83
            if re.match(test, host, re.I):
84
                return True
85
        return False
86

87
    def proxy_bypass(host):  # noqa
88
        """Return True, if the host should be bypassed.
89

90
        Checks proxy settings gathered from the environment, if specified,
91
        or the registry.
92
        """
93
        if getproxies_environment():
94
            return proxy_bypass_environment(host)
95
        else:
96
            return proxy_bypass_registry(host)
97

98

99
def dict_to_sequence(d):
100
    """Returns an internal sequence dictionary update."""
101

102
    if hasattr(d, 'items'):
103
        d = d.items()
104

105
    return d
106

107

108
def super_len(o):
109
    total_length = None
110
    current_position = 0
111

112
    if hasattr(o, '__len__'):
113
        total_length = len(o)
114

115
    elif hasattr(o, 'len'):
116
        total_length = o.len
117

118
    elif hasattr(o, 'fileno'):
119
        try:
120
            fileno = o.fileno()
121
        except io.UnsupportedOperation:
122
            pass
123
        else:
124
            total_length = os.fstat(fileno).st_size
125

126
            # Having used fstat to determine the file length, we need to
127
            # confirm that this file was opened up in binary mode.
128
            if 'b' not in o.mode:
129
                warnings.warn((
130
                    "Requests has determined the content-length for this "
131
                    "request using the binary size of the file: however, the "
132
                    "file has been opened in text mode (i.e. without the 'b' "
133
                    "flag in the mode). This may lead to an incorrect "
134
                    "content-length. In Requests 3.0, support will be removed "
135
                    "for files in text mode."),
136
                    FileModeWarning
137
                )
138

139
    if hasattr(o, 'tell'):
140
        try:
141
            current_position = o.tell()
142
        except (OSError, IOError):
143
            # This can happen in some weird situations, such as when the file
144
            # is actually a special file descriptor like stdin. In this
145
            # instance, we don't know what the length is, so set it to zero and
146
            # let requests chunk it instead.
147
            if total_length is not None:
148
                current_position = total_length
149
        else:
150
            if hasattr(o, 'seek') and total_length is None:
151
                # StringIO and BytesIO have seek but no useable fileno
152
                try:
153
                    # seek to end of file
154
                    o.seek(0, 2)
155
                    total_length = o.tell()
156

157
                    # seek back to current position to support
158
                    # partially read file-like objects
159
                    o.seek(current_position or 0)
160
                except (OSError, IOError):
161
                    total_length = 0
162

163
    if total_length is None:
164
        total_length = 0
165

166
    return max(0, total_length - current_position)
167

168

169
def get_netrc_auth(url, raise_errors=False):
170
    """Returns the Requests tuple auth for a given url from netrc."""
171

172
    try:
173
        from netrc import netrc, NetrcParseError
174

175
        netrc_path = None
176

177
        for f in NETRC_FILES:
178
            try:
179
                loc = os.path.expanduser('~/{}'.format(f))
180
            except KeyError:
181
                # os.path.expanduser can fail when $HOME is undefined and
182
                # getpwuid fails. See https://bugs.python.org/issue20164 &
183
                # https://github.com/psf/requests/issues/1846
184
                return
185

186
            if os.path.exists(loc):
187
                netrc_path = loc
188
                break
189

190
        # Abort early if there isn't one.
191
        if netrc_path is None:
192
            return
193

194
        ri = urlparse(url)
195

196
        # Strip port numbers from netloc. This weird `if...encode`` dance is
197
        # used for Python 3.2, which doesn't support unicode literals.
198
        splitstr = b':'
199
        if isinstance(url, str):
200
            splitstr = splitstr.decode('ascii')
201
        host = ri.netloc.split(splitstr)[0]
202

203
        try:
204
            _netrc = netrc(netrc_path).authenticators(host)
205
            if _netrc:
206
                # Return with login / password
207
                login_i = (0 if _netrc[0] else 1)
208
                return (_netrc[login_i], _netrc[2])
209
        except (NetrcParseError, IOError):
210
            # If there was a parsing error or a permissions issue reading the file,
211
            # we'll just skip netrc auth unless explicitly asked to raise errors.
212
            if raise_errors:
213
                raise
214

215
    # AppEngine hackiness.
216
    except (ImportError, AttributeError):
217
        pass
218

219

220
def guess_filename(obj):
221
    """Tries to guess the filename of the given object."""
222
    name = getattr(obj, 'name', None)
223
    if (name and isinstance(name, basestring) and name[0] != '<' and
224
            name[-1] != '>'):
225
        return os.path.basename(name)
226

227

228
def extract_zipped_paths(path):
229
    """Replace nonexistent paths that look like they refer to a member of a zip
230
    archive with the location of an extracted copy of the target, or else
231
    just return the provided path unchanged.
232
    """
233
    if os.path.exists(path):
234
        # this is already a valid path, no need to do anything further
235
        return path
236

237
    # find the first valid part of the provided path and treat that as a zip archive
238
    # assume the rest of the path is the name of a member in the archive
239
    archive, member = os.path.split(path)
240
    while archive and not os.path.exists(archive):
241
        archive, prefix = os.path.split(archive)
242
        member = '/'.join([prefix, member])
243

244
    if not zipfile.is_zipfile(archive):
245
        return path
246

247
    zip_file = zipfile.ZipFile(archive)
248
    if member not in zip_file.namelist():
249
        return path
250

251
    # we have a valid zip archive and a valid member of that archive
252
    tmp = tempfile.gettempdir()
253
    extracted_path = os.path.join(tmp, *member.split('/'))
254
    if not os.path.exists(extracted_path):
255
        extracted_path = zip_file.extract(member, path=tmp)
256

257
    return extracted_path
258

259

260
def from_key_val_list(value):
261
    """Take an object and test to see if it can be represented as a
262
    dictionary. Unless it can not be represented as such, return an
263
    OrderedDict, e.g.,
264

265
    ::
266

267
        >>> from_key_val_list([('key', 'val')])
268
        OrderedDict([('key', 'val')])
269
        >>> from_key_val_list('string')
270
        Traceback (most recent call last):
271
        ...
272
        ValueError: cannot encode objects that are not 2-tuples
273
        >>> from_key_val_list({'key': 'val'})
274
        OrderedDict([('key', 'val')])
275

276
    :rtype: OrderedDict
277
    """
278
    if value is None:
279
        return None
280

281
    if isinstance(value, (str, bytes, bool, int)):
282
        raise ValueError('cannot encode objects that are not 2-tuples')
283

284
    return OrderedDict(value)
285

286

287
def to_key_val_list(value):
288
    """Take an object and test to see if it can be represented as a
289
    dictionary. If it can be, return a list of tuples, e.g.,
290

291
    ::
292

293
        >>> to_key_val_list([('key', 'val')])
294
        [('key', 'val')]
295
        >>> to_key_val_list({'key': 'val'})
296
        [('key', 'val')]
297
        >>> to_key_val_list('string')
298
        Traceback (most recent call last):
299
        ...
300
        ValueError: cannot encode objects that are not 2-tuples
301

302
    :rtype: list
303
    """
304
    if value is None:
305
        return None
306

307
    if isinstance(value, (str, bytes, bool, int)):
308
        raise ValueError('cannot encode objects that are not 2-tuples')
309

310
    if isinstance(value, Mapping):
311
        value = value.items()
312

313
    return list(value)
314

315

316
# From mitsuhiko/werkzeug (used with permission).
317
def parse_list_header(value):
318
    """Parse lists as described by RFC 2068 Section 2.
319

320
    In particular, parse comma-separated lists where the elements of
321
    the list may include quoted-strings.  A quoted-string could
322
    contain a comma.  A non-quoted string could have quotes in the
323
    middle.  Quotes are removed automatically after parsing.
324

325
    It basically works like :func:`parse_set_header` just that items
326
    may appear multiple times and case sensitivity is preserved.
327

328
    The return value is a standard :class:`list`:
329

330
    >>> parse_list_header('token, "quoted value"')
331
    ['token', 'quoted value']
332

333
    To create a header from the :class:`list` again, use the
334
    :func:`dump_header` function.
335

336
    :param value: a string with a list header.
337
    :return: :class:`list`
338
    :rtype: list
339
    """
340
    result = []
341
    for item in _parse_list_header(value):
342
        if item[:1] == item[-1:] == '"':
343
            item = unquote_header_value(item[1:-1])
344
        result.append(item)
345
    return result
346

347

348
# From mitsuhiko/werkzeug (used with permission).
349
def parse_dict_header(value):
350
    """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
351
    convert them into a python dict:
352

353
    >>> d = parse_dict_header('foo="is a fish", bar="as well"')
354
    >>> type(d) is dict
355
    True
356
    >>> sorted(d.items())
357
    [('bar', 'as well'), ('foo', 'is a fish')]
358

359
    If there is no value for a key it will be `None`:
360

361
    >>> parse_dict_header('key_without_value')
362
    {'key_without_value': None}
363

364
    To create a header from the :class:`dict` again, use the
365
    :func:`dump_header` function.
366

367
    :param value: a string with a dict header.
368
    :return: :class:`dict`
369
    :rtype: dict
370
    """
371
    result = {}
372
    for item in _parse_list_header(value):
373
        if '=' not in item:
374
            result[item] = None
375
            continue
376
        name, value = item.split('=', 1)
377
        if value[:1] == value[-1:] == '"':
378
            value = unquote_header_value(value[1:-1])
379
        result[name] = value
380
    return result
381

382

383
# From mitsuhiko/werkzeug (used with permission).
384
def unquote_header_value(value, is_filename=False):
385
    r"""Unquotes a header value.  (Reversal of :func:`quote_header_value`).
386
    This does not use the real unquoting but what browsers are actually
387
    using for quoting.
388

389
    :param value: the header value to unquote.
390
    :rtype: str
391
    """
392
    if value and value[0] == value[-1] == '"':
393
        # this is not the real unquoting, but fixing this so that the
394
        # RFC is met will result in bugs with internet explorer and
395
        # probably some other browsers as well.  IE for example is
396
        # uploading files with "C:\foo\bar.txt" as filename
397
        value = value[1:-1]
398

399
        # if this is a filename and the starting characters look like
400
        # a UNC path, then just return the value without quotes.  Using the
401
        # replace sequence below on a UNC path has the effect of turning
402
        # the leading double slash into a single slash and then
403
        # _fix_ie_filename() doesn't work correctly.  See #458.
404
        if not is_filename or value[:2] != '\\\\':
405
            return value.replace('\\\\', '\\').replace('\\"', '"')
406
    return value
407

408

409
def dict_from_cookiejar(cj):
410
    """Returns a key/value dictionary from a CookieJar.
411

412
    :param cj: CookieJar object to extract cookies from.
413
    :rtype: dict
414
    """
415

416
    cookie_dict = {}
417

418
    for cookie in cj:
419
        cookie_dict[cookie.name] = cookie.value
420

421
    return cookie_dict
422

423

424
def add_dict_to_cookiejar(cj, cookie_dict):
425
    """Returns a CookieJar from a key/value dictionary.
426

427
    :param cj: CookieJar to insert cookies into.
428
    :param cookie_dict: Dict of key/values to insert into CookieJar.
429
    :rtype: CookieJar
430
    """
431

432
    return cookiejar_from_dict(cookie_dict, cj)
433

434

435
def get_encodings_from_content(content):
436
    """Returns encodings from given content string.
437

438
    :param content: bytestring to extract encodings from.
439
    """
440
    warnings.warn((
441
        'In requests 3.0, get_encodings_from_content will be removed. For '
442
        'more information, please see the discussion on issue #2266. (This'
443
        ' warning should only appear once.)'),
444
        DeprecationWarning)
445

446
    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
447
    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
448
    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
449

450
    return (charset_re.findall(content) +
451
            pragma_re.findall(content) +
452
            xml_re.findall(content))
453

454

455
def _parse_content_type_header(header):
456
    """Returns content type and parameters from given header
457

458
    :param header: string
459
    :return: tuple containing content type and dictionary of
460
         parameters
461
    """
462

463
    tokens = header.split(';')
464
    content_type, params = tokens[0].strip(), tokens[1:]
465
    params_dict = {}
466
    items_to_strip = "\"' "
467

468
    for param in params:
469
        param = param.strip()
470
        if param:
471
            key, value = param, True
472
            index_of_equals = param.find("=")
473
            if index_of_equals != -1:
474
                key = param[:index_of_equals].strip(items_to_strip)
475
                value = param[index_of_equals + 1:].strip(items_to_strip)
476
            params_dict[key.lower()] = value
477
    return content_type, params_dict
478

479

480
def get_encoding_from_headers(headers):
481
    """Returns encodings from given HTTP Header Dict.
482

483
    :param headers: dictionary to extract encoding from.
484
    :rtype: str
485
    """
486

487
    content_type = headers.get('content-type')
488

489
    if not content_type:
490
        return None
491

492
    content_type, params = _parse_content_type_header(content_type)
493

494
    if 'charset' in params:
495
        return params['charset'].strip("'\"")
496

497
    if 'text' in content_type:
498
        return 'ISO-8859-1'
499

500

501
def stream_decode_response_unicode(iterator, r):
502
    """Stream decodes a iterator."""
503

504
    if r.encoding is None:
505
        for item in iterator:
506
            yield item
507
        return
508

509
    decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
510
    for chunk in iterator:
511
        rv = decoder.decode(chunk)
512
        if rv:
513
            yield rv
514
    rv = decoder.decode(b'', final=True)
515
    if rv:
516
        yield rv
517

518

519
def iter_slices(string, slice_length):
520
    """Iterate over slices of a string."""
521
    pos = 0
522
    if slice_length is None or slice_length <= 0:
523
        slice_length = len(string)
524
    while pos < len(string):
525
        yield string[pos:pos + slice_length]
526
        pos += slice_length
527

528

529
def get_unicode_from_response(r):
530
    """Returns the requested content back in unicode.
531

532
    :param r: Response object to get unicode content from.
533

534
    Tried:
535

536
    1. charset from content-type
537
    2. fall back and replace all unicode characters
538

539
    :rtype: str
540
    """
541
    warnings.warn((
542
        'In requests 3.0, get_unicode_from_response will be removed. For '
543
        'more information, please see the discussion on issue #2266. (This'
544
        ' warning should only appear once.)'),
545
        DeprecationWarning)
546

547
    tried_encodings = []
548

549
    # Try charset from content-type
550
    encoding = get_encoding_from_headers(r.headers)
551

552
    if encoding:
553
        try:
554
            return str(r.content, encoding)
555
        except UnicodeError:
556
            tried_encodings.append(encoding)
557

558
    # Fall back:
559
    try:
560
        return str(r.content, encoding, errors='replace')
561
    except TypeError:
562
        return r.content
563

564

565
# The unreserved URI characters (RFC 3986)
566
UNRESERVED_SET = frozenset(
567
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
568

569

570
def unquote_unreserved(uri):
571
    """Un-escape any percent-escape sequences in a URI that are unreserved
572
    characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
573

574
    :rtype: str
575
    """
576
    parts = uri.split('%')
577
    for i in range(1, len(parts)):
578
        h = parts[i][0:2]
579
        if len(h) == 2 and h.isalnum():
580
            try:
581
                c = chr(int(h, 16))
582
            except ValueError:
583
                raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
584

585
            if c in UNRESERVED_SET:
586
                parts[i] = c + parts[i][2:]
587
            else:
588
                parts[i] = '%' + parts[i]
589
        else:
590
            parts[i] = '%' + parts[i]
591
    return ''.join(parts)
592

593

594
def requote_uri(uri):
595
    """Re-quote the given URI.
596

597
    This function passes the given URI through an unquote/quote cycle to
598
    ensure that it is fully and consistently quoted.
599

600
    :rtype: str
601
    """
602
    safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
603
    safe_without_percent = "!#$&'()*+,/:;=?@[]~"
604
    try:
605
        # Unquote only the unreserved characters
606
        # Then quote only illegal characters (do not quote reserved,
607
        # unreserved, or '%')
608
        return quote(unquote_unreserved(uri), safe=safe_with_percent)
609
    except InvalidURL:
610
        # We couldn't unquote the given URI, so let's try quoting it, but
611
        # there may be unquoted '%'s in the URI. We need to make sure they're
612
        # properly quoted so they do not cause issues elsewhere.
613
        return quote(uri, safe=safe_without_percent)
614

615

616
def address_in_network(ip, net):
617
    """This function allows you to check if an IP belongs to a network subnet
618

619
    Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
620
             returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
621

622
    :rtype: bool
623
    """
624
    ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
625
    netaddr, bits = net.split('/')
626
    netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
627
    network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
628
    return (ipaddr & netmask) == (network & netmask)
629

630

631
def dotted_netmask(mask):
632
    """Converts mask from /xx format to xxx.xxx.xxx.xxx
633

634
    Example: if mask is 24 function returns 255.255.255.0
635

636
    :rtype: str
637
    """
638
    bits = 0xffffffff ^ (1 << 32 - mask) - 1
639
    return socket.inet_ntoa(struct.pack('>I', bits))
640

641

642
def is_ipv4_address(string_ip):
643
    """
644
    :rtype: bool
645
    """
646
    try:
647
        socket.inet_aton(string_ip)
648
    except socket.error:
649
        return False
650
    return True
651

652

653
def is_valid_cidr(string_network):
654
    """
655
    Very simple check of the cidr format in no_proxy variable.
656

657
    :rtype: bool
658
    """
659
    if string_network.count('/') == 1:
660
        try:
661
            mask = int(string_network.split('/')[1])
662
        except ValueError:
663
            return False
664

665
        if mask < 1 or mask > 32:
666
            return False
667

668
        try:
669
            socket.inet_aton(string_network.split('/')[0])
670
        except socket.error:
671
            return False
672
    else:
673
        return False
674
    return True
675

676

677
@contextlib.contextmanager
678
def set_environ(env_name, value):
679
    """Set the environment variable 'env_name' to 'value'
680

681
    Save previous value, yield, and then restore the previous value stored in
682
    the environment variable 'env_name'.
683

684
    If 'value' is None, do nothing"""
685
    value_changed = value is not None
686
    if value_changed:
687
        old_value = os.environ.get(env_name)
688
        os.environ[env_name] = value
689
    try:
690
        yield
691
    finally:
692
        if value_changed:
693
            if old_value is None:
694
                del os.environ[env_name]
695
            else:
696
                os.environ[env_name] = old_value
697

698

699
def should_bypass_proxies(url, no_proxy):
700
    """
701
    Returns whether we should bypass proxies or not.
702

703
    :rtype: bool
704
    """
705
    # Prioritize lowercase environment variables over uppercase
706
    # to keep a consistent behaviour with other http projects (curl, wget).
707
    get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
708

709
    # First check whether no_proxy is defined. If it is, check that the URL
710
    # we're getting isn't in the no_proxy list.
711
    no_proxy_arg = no_proxy
712
    if no_proxy is None:
713
        no_proxy = get_proxy('no_proxy')
714
    parsed = urlparse(url)
715

716
    if parsed.hostname is None:
717
        # URLs don't always have hostnames, e.g. file:/// urls.
718
        return True
719

720
    if no_proxy:
721
        # We need to check whether we match here. We need to see if we match
722
        # the end of the hostname, both with and without the port.
723
        no_proxy = (
724
            host for host in no_proxy.replace(' ', '').split(',') if host
725
        )
726

727
        if is_ipv4_address(parsed.hostname):
728
            for proxy_ip in no_proxy:
729
                if is_valid_cidr(proxy_ip):
730
                    if address_in_network(parsed.hostname, proxy_ip):
731
                        return True
732
                elif parsed.hostname == proxy_ip:
733
                    # If no_proxy ip was defined in plain IP notation instead of cidr notation &
734
                    # matches the IP of the index
735
                    return True
736
        else:
737
            host_with_port = parsed.hostname
738
            if parsed.port:
739
                host_with_port += ':{}'.format(parsed.port)
740

741
            for host in no_proxy:
742
                if parsed.hostname.endswith(host) or host_with_port.endswith(host):
743
                    # The URL does match something in no_proxy, so we don't want
744
                    # to apply the proxies on this URL.
745
                    return True
746

747
    with set_environ('no_proxy', no_proxy_arg):
748
        # parsed.hostname can be `None` in cases such as a file URI.
749
        try:
750
            bypass = proxy_bypass(parsed.hostname)
751
        except (TypeError, socket.gaierror):
752
            bypass = False
753

754
    if bypass:
755
        return True
756

757
    return False
758

759

760
def get_environ_proxies(url, no_proxy=None):
761
    """
762
    Return a dict of environment proxies.
763

764
    :rtype: dict
765
    """
766
    if should_bypass_proxies(url, no_proxy=no_proxy):
767
        return {}
768
    else:
769
        return getproxies()
770

771

772
def select_proxy(url, proxies):
773
    """Select a proxy for the url, if applicable.
774

775
    :param url: The url being for the request
776
    :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
777
    """
778
    proxies = proxies or {}
779
    urlparts = urlparse(url)
780
    if urlparts.hostname is None:
781
        return proxies.get(urlparts.scheme, proxies.get('all'))
782

783
    proxy_keys = [
784
        urlparts.scheme + '://' + urlparts.hostname,
785
        urlparts.scheme,
786
        'all://' + urlparts.hostname,
787
        'all',
788
    ]
789
    proxy = None
790
    for proxy_key in proxy_keys:
791
        if proxy_key in proxies:
792
            proxy = proxies[proxy_key]
793
            break
794

795
    return proxy
796

797

798
def default_user_agent(name="python-requests"):
799
    """
800
    Return a string representing the default user agent.
801

802
    :rtype: str
803
    """
804
    return '%s/%s' % (name, __version__)
805

806

807
def default_headers():
808
    """
809
    :rtype: requests.structures.CaseInsensitiveDict
810
    """
811
    return CaseInsensitiveDict({
812
        'User-Agent': default_user_agent(),
813
        'Accept-Encoding': ', '.join(('gzip', 'deflate')),
814
        'Accept': '*/*',
815
        'Connection': 'keep-alive',
816
    })
817

818

819
def parse_header_links(value):
820
    """Return a list of parsed link headers proxies.
821

822
    i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
823

824
    :rtype: list
825
    """
826

827
    links = []
828

829
    replace_chars = ' \'"'
830

831
    value = value.strip(replace_chars)
832
    if not value:
833
        return links
834

835
    for val in re.split(', *<', value):
836
        try:
837
            url, params = val.split(';', 1)
838
        except ValueError:
839
            url, params = val, ''
840

841
        link = {'url': url.strip('<> \'"')}
842

843
        for param in params.split(';'):
844
            try:
845
                key, value = param.split('=')
846
            except ValueError:
847
                break
848

849
            link[key.strip(replace_chars)] = value.strip(replace_chars)
850

851
        links.append(link)
852

853
    return links
854

855

856
# Null bytes; no need to recreate these on each call to guess_json_utf
857
_null = '\x00'.encode('ascii')  # encoding to ASCII for Python 3
858
_null2 = _null * 2
859
_null3 = _null * 3
860

861

862
def guess_json_utf(data):
863
    """
864
    :rtype: str
865
    """
866
    # JSON always starts with two ASCII characters, so detection is as
867
    # easy as counting the nulls and from their location and count
868
    # determine the encoding. Also detect a BOM, if present.
869
    sample = data[:4]
870
    if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
871
        return 'utf-32'     # BOM included
872
    if sample[:3] == codecs.BOM_UTF8:
873
        return 'utf-8-sig'  # BOM included, MS style (discouraged)
874
    if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
875
        return 'utf-16'     # BOM included
876
    nullcount = sample.count(_null)
877
    if nullcount == 0:
878
        return 'utf-8'
879
    if nullcount == 2:
880
        if sample[::2] == _null2:   # 1st and 3rd are null
881
            return 'utf-16-be'
882
        if sample[1::2] == _null2:  # 2nd and 4th are null
883
            return 'utf-16-le'
884
        # Did not detect 2 valid UTF-16 ascii-range characters
885
    if nullcount == 3:
886
        if sample[:3] == _null3:
887
            return 'utf-32-be'
888
        if sample[1:] == _null3:
889
            return 'utf-32-le'
890
        # Did not detect a valid UTF-32 ascii-range character
891
    return None
892

893

894
def prepend_scheme_if_needed(url, new_scheme):
895
    """Given a URL that may or may not have a scheme, prepend the given scheme.
896
    Does not replace a present scheme with the one provided as an argument.
897

898
    :rtype: str
899
    """
900
    scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
901

902
    # urlparse is a finicky beast, and sometimes decides that there isn't a
903
    # netloc present. Assume that it's being over-cautious, and switch netloc
904
    # and path if urlparse decided there was no netloc.
905
    if not netloc:
906
        netloc, path = path, netloc
907

908
    return urlunparse((scheme, netloc, path, params, query, fragment))
909

910

911
def get_auth_from_url(url):
912
    """Given a url with authentication components, extract them into a tuple of
913
    username,password.
914

915
    :rtype: (str,str)
916
    """
917
    parsed = urlparse(url)
918

919
    try:
920
        auth = (unquote(parsed.username), unquote(parsed.password))
921
    except (AttributeError, TypeError):
922
        auth = ('', '')
923

924
    return auth
925

926

927
# Moved outside of function to avoid recompile every call
928
_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
929
_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
930

931

932
def check_header_validity(header):
933
    """Verifies that header value is a string which doesn't contain
934
    leading whitespace or return characters. This prevents unintended
935
    header injection.
936

937
    :param header: tuple, in the format (name, value).
938
    """
939
    name, value = header
940

941
    if isinstance(value, bytes):
942
        pat = _CLEAN_HEADER_REGEX_BYTE
943
    else:
944
        pat = _CLEAN_HEADER_REGEX_STR
945
    try:
946
        if not pat.match(value):
947
            raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
948
    except TypeError:
949
        raise InvalidHeader("Value for header {%s: %s} must be of type str or "
950
                            "bytes, not %s" % (name, value, type(value)))
951

952

953
def urldefragauth(url):
954
    """
955
    Given a url remove the fragment and the authentication part.
956

957
    :rtype: str
958
    """
959
    scheme, netloc, path, params, query, fragment = urlparse(url)
960

961
    # see func:`prepend_scheme_if_needed`
962
    if not netloc:
963
        netloc, path = path, netloc
964

965
    netloc = netloc.rsplit('@', 1)[-1]
966

967
    return urlunparse((scheme, netloc, path, params, query, ''))
968

969

970
def rewind_body(prepared_request):
971
    """Move file pointer back to its recorded starting position
972
    so it can be read again on redirect.
973
    """
974
    body_seek = getattr(prepared_request.body, 'seek', None)
975
    if body_seek is not None and isinstance(prepared_request._body_position, integer_types):
976
        try:
977
            body_seek(prepared_request._body_position)
978
        except (IOError, OSError):
979
            raise UnrewindableBodyError("An error occurred when rewinding request "
980
                                        "body for redirect.")
981
    else:
982
        raise UnrewindableBodyError("Unable to rewind request body for redirect.")
983

984
Product

Resources

Company