CoCalc -- package

GitHub Repository: keewenaw/ethereum-wallet-cracker
Path: blob/main/test/lib/python3.9/site-packages/setuptools/package_index.py
⁴⁷⁹⁸ views
1
"""PyPI and direct package downloading"""
2
import sys
3
import os
4
import re
5
import io
6
import shutil
7
import socket
8
import base64
9
import hashlib
10
import itertools
11
import warnings
12
import configparser
13
import html
14
import http.client
15
import urllib.parse
16
import urllib.request
17
import urllib.error
18
from functools import wraps
19

20
import setuptools
21
from pkg_resources import (
22
    CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,
23
    Environment, find_distributions, safe_name, safe_version,
24
    to_filename, Requirement, DEVELOP_DIST, EGG_DIST, parse_version,
25
)
26
from distutils import log
27
from distutils.errors import DistutilsError
28
from fnmatch import translate
29
from setuptools.wheel import Wheel
30
from setuptools.extern.more_itertools import unique_everseen
31

32

33
EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
34
HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
35
PYPI_MD5 = re.compile(
36
    r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'
37
    r'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\)'
38
)
39
URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
40
EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
41

42
__all__ = [
43
    'PackageIndex', 'distros_for_url', 'parse_bdist_wininst',
44
    'interpret_distro_name',
45
]
46

47
_SOCKET_TIMEOUT = 15
48

49
_tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
50
user_agent = _tmpl.format(
51
    py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools)
52

53

54
def parse_requirement_arg(spec):
55
    try:
56
        return Requirement.parse(spec)
57
    except ValueError as e:
58
        raise DistutilsError(
59
            "Not a URL, existing file, or requirement spec: %r" % (spec,)
60
        ) from e
61

62

63
def parse_bdist_wininst(name):
64
    """Return (base,pyversion) or (None,None) for possible .exe name"""
65

66
    lower = name.lower()
67
    base, py_ver, plat = None, None, None
68

69
    if lower.endswith('.exe'):
70
        if lower.endswith('.win32.exe'):
71
            base = name[:-10]
72
            plat = 'win32'
73
        elif lower.startswith('.win32-py', -16):
74
            py_ver = name[-7:-4]
75
            base = name[:-16]
76
            plat = 'win32'
77
        elif lower.endswith('.win-amd64.exe'):
78
            base = name[:-14]
79
            plat = 'win-amd64'
80
        elif lower.startswith('.win-amd64-py', -20):
81
            py_ver = name[-7:-4]
82
            base = name[:-20]
83
            plat = 'win-amd64'
84
    return base, py_ver, plat
85

86

87
def egg_info_for_url(url):
88
    parts = urllib.parse.urlparse(url)
89
    scheme, server, path, parameters, query, fragment = parts
90
    base = urllib.parse.unquote(path.split('/')[-1])
91
    if server == 'sourceforge.net' and base == 'download':  # XXX Yuck
92
        base = urllib.parse.unquote(path.split('/')[-2])
93
    if '#' in base:
94
        base, fragment = base.split('#', 1)
95
    return base, fragment
96

97

98
def distros_for_url(url, metadata=None):
99
    """Yield egg or source distribution objects that might be found at a URL"""
100
    base, fragment = egg_info_for_url(url)
101
    for dist in distros_for_location(url, base, metadata):
102
        yield dist
103
    if fragment:
104
        match = EGG_FRAGMENT.match(fragment)
105
        if match:
106
            for dist in interpret_distro_name(
107
                url, match.group(1), metadata, precedence=CHECKOUT_DIST
108
            ):
109
                yield dist
110

111

112
def distros_for_location(location, basename, metadata=None):
113
    """Yield egg or source distribution objects based on basename"""
114
    if basename.endswith('.egg.zip'):
115
        basename = basename[:-4]  # strip the .zip
116
    if basename.endswith('.egg') and '-' in basename:
117
        # only one, unambiguous interpretation
118
        return [Distribution.from_location(location, basename, metadata)]
119
    if basename.endswith('.whl') and '-' in basename:
120
        wheel = Wheel(basename)
121
        if not wheel.is_compatible():
122
            return []
123
        return [Distribution(
124
            location=location,
125
            project_name=wheel.project_name,
126
            version=wheel.version,
127
            # Increase priority over eggs.
128
            precedence=EGG_DIST + 1,
129
        )]
130
    if basename.endswith('.exe'):
131
        win_base, py_ver, platform = parse_bdist_wininst(basename)
132
        if win_base is not None:
133
            return interpret_distro_name(
134
                location, win_base, metadata, py_ver, BINARY_DIST, platform
135
            )
136
    # Try source distro extensions (.zip, .tgz, etc.)
137
    #
138
    for ext in EXTENSIONS:
139
        if basename.endswith(ext):
140
            basename = basename[:-len(ext)]
141
            return interpret_distro_name(location, basename, metadata)
142
    return []  # no extension matched
143

144

145
def distros_for_filename(filename, metadata=None):
146
    """Yield possible egg or source distribution objects based on a filename"""
147
    return distros_for_location(
148
        normalize_path(filename), os.path.basename(filename), metadata
149
    )
150

151

152
def interpret_distro_name(
153
        location, basename, metadata, py_version=None, precedence=SOURCE_DIST,
154
        platform=None
155
):
156
    """Generate alternative interpretations of a source distro name
157

158
    Note: if `location` is a filesystem filename, you should call
159
    ``pkg_resources.normalize_path()`` on it before passing it to this
160
    routine!
161
    """
162
    # Generate alternative interpretations of a source distro name
163
    # Because some packages are ambiguous as to name/versions split
164
    # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
165
    # So, we generate each possible interpretation (e.g. "adns, python-1.1.0"
166
    # "adns-python, 1.1.0", and "adns-python-1.1.0, no version").  In practice,
167
    # the spurious interpretations should be ignored, because in the event
168
    # there's also an "adns" package, the spurious "python-1.1.0" version will
169
    # compare lower than any numeric version number, and is therefore unlikely
170
    # to match a request for it.  It's still a potential problem, though, and
171
    # in the long run PyPI and the distutils should go for "safe" names and
172
    # versions in distribution archive names (sdist and bdist).
173

174
    parts = basename.split('-')
175
    if not py_version and any(re.match(r'py\d\.\d$', p) for p in parts[2:]):
176
        # it is a bdist_dumb, not an sdist -- bail out
177
        return
178

179
    for p in range(1, len(parts) + 1):
180
        yield Distribution(
181
            location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
182
            py_version=py_version, precedence=precedence,
183
            platform=platform
184
        )
185

186

187
def unique_values(func):
188
    """
189
    Wrap a function returning an iterable such that the resulting iterable
190
    only ever yields unique items.
191
    """
192

193
    @wraps(func)
194
    def wrapper(*args, **kwargs):
195
        return unique_everseen(func(*args, **kwargs))
196

197
    return wrapper
198

199

200
REL = re.compile(r"""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
201
# this line is here to fix emacs' cruddy broken syntax highlighting
202

203

204
@unique_values
205
def find_external_links(url, page):
206
    """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
207

208
    for match in REL.finditer(page):
209
        tag, rel = match.groups()
210
        rels = set(map(str.strip, rel.lower().split(',')))
211
        if 'homepage' in rels or 'download' in rels:
212
            for match in HREF.finditer(tag):
213
                yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
214

215
    for tag in ("<th>Home Page", "<th>Download URL"):
216
        pos = page.find(tag)
217
        if pos != -1:
218
            match = HREF.search(page, pos)
219
            if match:
220
                yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
221

222

223
class ContentChecker:
224
    """
225
    A null content checker that defines the interface for checking content
226
    """
227

228
    def feed(self, block):
229
        """
230
        Feed a block of data to the hash.
231
        """
232
        return
233

234
    def is_valid(self):
235
        """
236
        Check the hash. Return False if validation fails.
237
        """
238
        return True
239

240
    def report(self, reporter, template):
241
        """
242
        Call reporter with information about the checker (hash name)
243
        substituted into the template.
244
        """
245
        return
246

247

248
class HashChecker(ContentChecker):
249
    pattern = re.compile(
250
        r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
251
        r'(?P<expected>[a-f0-9]+)'
252
    )
253

254
    def __init__(self, hash_name, expected):
255
        self.hash_name = hash_name
256
        self.hash = hashlib.new(hash_name)
257
        self.expected = expected
258

259
    @classmethod
260
    def from_url(cls, url):
261
        "Construct a (possibly null) ContentChecker from a URL"
262
        fragment = urllib.parse.urlparse(url)[-1]
263
        if not fragment:
264
            return ContentChecker()
265
        match = cls.pattern.search(fragment)
266
        if not match:
267
            return ContentChecker()
268
        return cls(**match.groupdict())
269

270
    def feed(self, block):
271
        self.hash.update(block)
272

273
    def is_valid(self):
274
        return self.hash.hexdigest() == self.expected
275

276
    def report(self, reporter, template):
277
        msg = template % self.hash_name
278
        return reporter(msg)
279

280

281
class PackageIndex(Environment):
282
    """A distribution index that scans web pages for download URLs"""
283

284
    def __init__(
285
            self, index_url="https://pypi.org/simple/", hosts=('*',),
286
            ca_bundle=None, verify_ssl=True, *args, **kw
287
    ):
288
        super().__init__(*args, **kw)
289
        self.index_url = index_url + "/" [:not index_url.endswith('/')]
290
        self.scanned_urls = {}
291
        self.fetched_urls = {}
292
        self.package_pages = {}
293
        self.allows = re.compile('|'.join(map(translate, hosts))).match
294
        self.to_scan = []
295
        self.opener = urllib.request.urlopen
296

297
    def add(self, dist):
298
        # ignore invalid versions
299
        try:
300
            parse_version(dist.version)
301
        except Exception:
302
            return
303
        return super().add(dist)
304

305
    # FIXME: 'PackageIndex.process_url' is too complex (14)
306
    def process_url(self, url, retrieve=False):  # noqa: C901
307
        """Evaluate a URL as a possible download, and maybe retrieve it"""
308
        if url in self.scanned_urls and not retrieve:
309
            return
310
        self.scanned_urls[url] = True
311
        if not URL_SCHEME(url):
312
            self.process_filename(url)
313
            return
314
        else:
315
            dists = list(distros_for_url(url))
316
            if dists:
317
                if not self.url_ok(url):
318
                    return
319
                self.debug("Found link: %s", url)
320

321
        if dists or not retrieve or url in self.fetched_urls:
322
            list(map(self.add, dists))
323
            return  # don't need the actual page
324

325
        if not self.url_ok(url):
326
            self.fetched_urls[url] = True
327
            return
328

329
        self.info("Reading %s", url)
330
        self.fetched_urls[url] = True  # prevent multiple fetch attempts
331
        tmpl = "Download error on %s: %%s -- Some packages may not be found!"
332
        f = self.open_url(url, tmpl % url)
333
        if f is None:
334
            return
335
        if isinstance(f, urllib.error.HTTPError) and f.code == 401:
336
            self.info("Authentication error: %s" % f.msg)
337
        self.fetched_urls[f.url] = True
338
        if 'html' not in f.headers.get('content-type', '').lower():
339
            f.close()  # not html, we can't process it
340
            return
341

342
        base = f.url  # handle redirects
343
        page = f.read()
344
        if not isinstance(page, str):
345
            # In Python 3 and got bytes but want str.
346
            if isinstance(f, urllib.error.HTTPError):
347
                # Errors have no charset, assume latin1:
348
                charset = 'latin-1'
349
            else:
350
                charset = f.headers.get_param('charset') or 'latin-1'
351
            page = page.decode(charset, "ignore")
352
        f.close()
353
        for match in HREF.finditer(page):
354
            link = urllib.parse.urljoin(base, htmldecode(match.group(1)))
355
            self.process_url(link)
356
        if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:
357
            page = self.process_index(url, page)
358

359
    def process_filename(self, fn, nested=False):
360
        # process filenames or directories
361
        if not os.path.exists(fn):
362
            self.warn("Not found: %s", fn)
363
            return
364

365
        if os.path.isdir(fn) and not nested:
366
            path = os.path.realpath(fn)
367
            for item in os.listdir(path):
368
                self.process_filename(os.path.join(path, item), True)
369

370
        dists = distros_for_filename(fn)
371
        if dists:
372
            self.debug("Found: %s", fn)
373
            list(map(self.add, dists))
374

375
    def url_ok(self, url, fatal=False):
376
        s = URL_SCHEME(url)
377
        is_file = s and s.group(1).lower() == 'file'
378
        if is_file or self.allows(urllib.parse.urlparse(url)[1]):
379
            return True
380
        msg = (
381
            "\nNote: Bypassing %s (disallowed host; see "
382
            "http://bit.ly/2hrImnY for details).\n")
383
        if fatal:
384
            raise DistutilsError(msg % url)
385
        else:
386
            self.warn(msg, url)
387

388
    def scan_egg_links(self, search_path):
389
        dirs = filter(os.path.isdir, search_path)
390
        egg_links = (
391
            (path, entry)
392
            for path in dirs
393
            for entry in os.listdir(path)
394
            if entry.endswith('.egg-link')
395
        )
396
        list(itertools.starmap(self.scan_egg_link, egg_links))
397

398
    def scan_egg_link(self, path, entry):
399
        with open(os.path.join(path, entry)) as raw_lines:
400
            # filter non-empty lines
401
            lines = list(filter(None, map(str.strip, raw_lines)))
402

403
        if len(lines) != 2:
404
            # format is not recognized; punt
405
            return
406

407
        egg_path, setup_path = lines
408

409
        for dist in find_distributions(os.path.join(path, egg_path)):
410
            dist.location = os.path.join(path, *lines)
411
            dist.precedence = SOURCE_DIST
412
            self.add(dist)
413

414
    def _scan(self, link):
415
        # Process a URL to see if it's for a package page
416
        NO_MATCH_SENTINEL = None, None
417
        if not link.startswith(self.index_url):
418
            return NO_MATCH_SENTINEL
419

420
        parts = list(map(
421
            urllib.parse.unquote, link[len(self.index_url):].split('/')
422
        ))
423
        if len(parts) != 2 or '#' in parts[1]:
424
            return NO_MATCH_SENTINEL
425

426
        # it's a package page, sanitize and index it
427
        pkg = safe_name(parts[0])
428
        ver = safe_version(parts[1])
429
        self.package_pages.setdefault(pkg.lower(), {})[link] = True
430
        return to_filename(pkg), to_filename(ver)
431

432
    def process_index(self, url, page):
433
        """Process the contents of a PyPI page"""
434

435
        # process an index page into the package-page index
436
        for match in HREF.finditer(page):
437
            try:
438
                self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
439
            except ValueError:
440
                pass
441

442
        pkg, ver = self._scan(url)  # ensure this page is in the page index
443
        if not pkg:
444
            return ""  # no sense double-scanning non-package pages
445

446
        # process individual package page
447
        for new_url in find_external_links(url, page):
448
            # Process the found URL
449
            base, frag = egg_info_for_url(new_url)
450
            if base.endswith('.py') and not frag:
451
                if ver:
452
                    new_url += '#egg=%s-%s' % (pkg, ver)
453
                else:
454
                    self.need_version_info(url)
455
            self.scan_url(new_url)
456

457
        return PYPI_MD5.sub(
458
            lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
459
        )
460

461
    def need_version_info(self, url):
462
        self.scan_all(
463
            "Page at %s links to .py file(s) without version info; an index "
464
            "scan is required.", url
465
        )
466

467
    def scan_all(self, msg=None, *args):
468
        if self.index_url not in self.fetched_urls:
469
            if msg:
470
                self.warn(msg, *args)
471
            self.info(
472
                "Scanning index of all packages (this may take a while)"
473
            )
474
        self.scan_url(self.index_url)
475

476
    def find_packages(self, requirement):
477
        self.scan_url(self.index_url + requirement.unsafe_name + '/')
478

479
        if not self.package_pages.get(requirement.key):
480
            # Fall back to safe version of the name
481
            self.scan_url(self.index_url + requirement.project_name + '/')
482

483
        if not self.package_pages.get(requirement.key):
484
            # We couldn't find the target package, so search the index page too
485
            self.not_found_in_index(requirement)
486

487
        for url in list(self.package_pages.get(requirement.key, ())):
488
            # scan each page that might be related to the desired package
489
            self.scan_url(url)
490

491
    def obtain(self, requirement, installer=None):
492
        self.prescan()
493
        self.find_packages(requirement)
494
        for dist in self[requirement.key]:
495
            if dist in requirement:
496
                return dist
497
            self.debug("%s does not match %s", requirement, dist)
498
        return super(PackageIndex, self).obtain(requirement, installer)
499

500
    def check_hash(self, checker, filename, tfp):
501
        """
502
        checker is a ContentChecker
503
        """
504
        checker.report(
505
            self.debug,
506
            "Validating %%s checksum for %s" % filename)
507
        if not checker.is_valid():
508
            tfp.close()
509
            os.unlink(filename)
510
            raise DistutilsError(
511
                "%s validation failed for %s; "
512
                "possible download problem?"
513
                % (checker.hash.name, os.path.basename(filename))
514
            )
515

516
    def add_find_links(self, urls):
517
        """Add `urls` to the list that will be prescanned for searches"""
518
        for url in urls:
519
            if (
520
                self.to_scan is None  # if we have already "gone online"
521
                or not URL_SCHEME(url)  # or it's a local file/directory
522
                or url.startswith('file:')
523
                or list(distros_for_url(url))  # or a direct package link
524
            ):
525
                # then go ahead and process it now
526
                self.scan_url(url)
527
            else:
528
                # otherwise, defer retrieval till later
529
                self.to_scan.append(url)
530

531
    def prescan(self):
532
        """Scan urls scheduled for prescanning (e.g. --find-links)"""
533
        if self.to_scan:
534
            list(map(self.scan_url, self.to_scan))
535
        self.to_scan = None  # from now on, go ahead and process immediately
536

537
    def not_found_in_index(self, requirement):
538
        if self[requirement.key]:  # we've seen at least one distro
539
            meth, msg = self.info, "Couldn't retrieve index page for %r"
540
        else:  # no distros seen for this name, might be misspelled
541
            meth, msg = (
542
                self.warn,
543
                "Couldn't find index page for %r (maybe misspelled?)")
544
        meth(msg, requirement.unsafe_name)
545
        self.scan_all()
546

547
    def download(self, spec, tmpdir):
548
        """Locate and/or download `spec` to `tmpdir`, returning a local path
549

550
        `spec` may be a ``Requirement`` object, or a string containing a URL,
551
        an existing local filename, or a project/version requirement spec
552
        (i.e. the string form of a ``Requirement`` object).  If it is the URL
553
        of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
554
        that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
555
        automatically created alongside the downloaded file.
556

557
        If `spec` is a ``Requirement`` object or a string containing a
558
        project/version requirement spec, this method returns the location of
559
        a matching distribution (possibly after downloading it to `tmpdir`).
560
        If `spec` is a locally existing file or directory name, it is simply
561
        returned unchanged.  If `spec` is a URL, it is downloaded to a subpath
562
        of `tmpdir`, and the local filename is returned.  Various errors may be
563
        raised if a problem occurs during downloading.
564
        """
565
        if not isinstance(spec, Requirement):
566
            scheme = URL_SCHEME(spec)
567
            if scheme:
568
                # It's a url, download it to tmpdir
569
                found = self._download_url(scheme.group(1), spec, tmpdir)
570
                base, fragment = egg_info_for_url(spec)
571
                if base.endswith('.py'):
572
                    found = self.gen_setup(found, fragment, tmpdir)
573
                return found
574
            elif os.path.exists(spec):
575
                # Existing file or directory, just return it
576
                return spec
577
            else:
578
                spec = parse_requirement_arg(spec)
579
        return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
580

581
    def fetch_distribution(  # noqa: C901  # is too complex (14)  # FIXME
582
            self, requirement, tmpdir, force_scan=False, source=False,
583
            develop_ok=False, local_index=None):
584
        """Obtain a distribution suitable for fulfilling `requirement`
585

586
        `requirement` must be a ``pkg_resources.Requirement`` instance.
587
        If necessary, or if the `force_scan` flag is set, the requirement is
588
        searched for in the (online) package index as well as the locally
589
        installed packages.  If a distribution matching `requirement` is found,
590
        the returned distribution's ``location`` is the value you would have
591
        gotten from calling the ``download()`` method with the matching
592
        distribution's URL or filename.  If no matching distribution is found,
593
        ``None`` is returned.
594

595
        If the `source` flag is set, only source distributions and source
596
        checkout links will be considered.  Unless the `develop_ok` flag is
597
        set, development and system eggs (i.e., those using the ``.egg-info``
598
        format) will be ignored.
599
        """
600
        # process a Requirement
601
        self.info("Searching for %s", requirement)
602
        skipped = {}
603
        dist = None
604

605
        def find(req, env=None):
606
            if env is None:
607
                env = self
608
            # Find a matching distribution; may be called more than once
609

610
            for dist in env[req.key]:
611

612
                if dist.precedence == DEVELOP_DIST and not develop_ok:
613
                    if dist not in skipped:
614
                        self.warn(
615
                            "Skipping development or system egg: %s", dist,
616
                        )
617
                        skipped[dist] = 1
618
                    continue
619

620
                test = (
621
                    dist in req
622
                    and (dist.precedence <= SOURCE_DIST or not source)
623
                )
624
                if test:
625
                    loc = self.download(dist.location, tmpdir)
626
                    dist.download_location = loc
627
                    if os.path.exists(dist.download_location):
628
                        return dist
629

630
        if force_scan:
631
            self.prescan()
632
            self.find_packages(requirement)
633
            dist = find(requirement)
634

635
        if not dist and local_index is not None:
636
            dist = find(requirement, local_index)
637

638
        if dist is None:
639
            if self.to_scan is not None:
640
                self.prescan()
641
            dist = find(requirement)
642

643
        if dist is None and not force_scan:
644
            self.find_packages(requirement)
645
            dist = find(requirement)
646

647
        if dist is None:
648
            self.warn(
649
                "No local packages or working download links found for %s%s",
650
                (source and "a source distribution of " or ""),
651
                requirement,
652
            )
653
        else:
654
            self.info("Best match: %s", dist)
655
            return dist.clone(location=dist.download_location)
656

657
    def fetch(self, requirement, tmpdir, force_scan=False, source=False):
658
        """Obtain a file suitable for fulfilling `requirement`
659

660
        DEPRECATED; use the ``fetch_distribution()`` method now instead.  For
661
        backward compatibility, this routine is identical but returns the
662
        ``location`` of the downloaded distribution instead of a distribution
663
        object.
664
        """
665
        dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)
666
        if dist is not None:
667
            return dist.location
668
        return None
669

670
    def gen_setup(self, filename, fragment, tmpdir):
671
        match = EGG_FRAGMENT.match(fragment)
672
        dists = match and [
673
            d for d in
674
            interpret_distro_name(filename, match.group(1), None) if d.version
675
        ] or []
676

677
        if len(dists) == 1:  # unambiguous ``#egg`` fragment
678
            basename = os.path.basename(filename)
679

680
            # Make sure the file has been downloaded to the temp dir.
681
            if os.path.dirname(filename) != tmpdir:
682
                dst = os.path.join(tmpdir, basename)
683
                if not (os.path.exists(dst) and os.path.samefile(filename, dst)):
684
                    shutil.copy2(filename, dst)
685
                    filename = dst
686

687
            with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
688
                file.write(
689
                    "from setuptools import setup\n"
690
                    "setup(name=%r, version=%r, py_modules=[%r])\n"
691
                    % (
692
                        dists[0].project_name, dists[0].version,
693
                        os.path.splitext(basename)[0]
694
                    )
695
                )
696
            return filename
697

698
        elif match:
699
            raise DistutilsError(
700
                "Can't unambiguously interpret project/version identifier %r; "
701
                "any dashes in the name or version should be escaped using "
702
                "underscores. %r" % (fragment, dists)
703
            )
704
        else:
705
            raise DistutilsError(
706
                "Can't process plain .py files without an '#egg=name-version'"
707
                " suffix to enable automatic setup script generation."
708
            )
709

710
    dl_blocksize = 8192
711

712
    def _download_to(self, url, filename):
713
        self.info("Downloading %s", url)
714
        # Download the file
715
        fp = None
716
        try:
717
            checker = HashChecker.from_url(url)
718
            fp = self.open_url(url)
719
            if isinstance(fp, urllib.error.HTTPError):
720
                raise DistutilsError(
721
                    "Can't download %s: %s %s" % (url, fp.code, fp.msg)
722
                )
723
            headers = fp.info()
724
            blocknum = 0
725
            bs = self.dl_blocksize
726
            size = -1
727
            if "content-length" in headers:
728
                # Some servers return multiple Content-Length headers :(
729
                sizes = headers.get_all('Content-Length')
730
                size = max(map(int, sizes))
731
                self.reporthook(url, filename, blocknum, bs, size)
732
            with open(filename, 'wb') as tfp:
733
                while True:
734
                    block = fp.read(bs)
735
                    if block:
736
                        checker.feed(block)
737
                        tfp.write(block)
738
                        blocknum += 1
739
                        self.reporthook(url, filename, blocknum, bs, size)
740
                    else:
741
                        break
742
                self.check_hash(checker, filename, tfp)
743
            return headers
744
        finally:
745
            if fp:
746
                fp.close()
747

748
    def reporthook(self, url, filename, blocknum, blksize, size):
749
        pass  # no-op
750

751
    # FIXME:
752
    def open_url(self, url, warning=None):  # noqa: C901  # is too complex (12)
753
        if url.startswith('file:'):
754
            return local_open(url)
755
        try:
756
            return open_with_auth(url, self.opener)
757
        except (ValueError, http.client.InvalidURL) as v:
758
            msg = ' '.join([str(arg) for arg in v.args])
759
            if warning:
760
                self.warn(warning, msg)
761
            else:
762
                raise DistutilsError('%s %s' % (url, msg)) from v
763
        except urllib.error.HTTPError as v:
764
            return v
765
        except urllib.error.URLError as v:
766
            if warning:
767
                self.warn(warning, v.reason)
768
            else:
769
                raise DistutilsError("Download error for %s: %s"
770
                                     % (url, v.reason)) from v
771
        except http.client.BadStatusLine as v:
772
            if warning:
773
                self.warn(warning, v.line)
774
            else:
775
                raise DistutilsError(
776
                    '%s returned a bad status line. The server might be '
777
                    'down, %s' %
778
                    (url, v.line)
779
                ) from v
780
        except (http.client.HTTPException, socket.error) as v:
781
            if warning:
782
                self.warn(warning, v)
783
            else:
784
                raise DistutilsError("Download error for %s: %s"
785
                                     % (url, v)) from v
786

787
    def _download_url(self, scheme, url, tmpdir):
788
        # Determine download filename
789
        #
790
        name, fragment = egg_info_for_url(url)
791
        if name:
792
            while '..' in name:
793
                name = name.replace('..', '.').replace('\\', '_')
794
        else:
795
            name = "__downloaded__"  # default if URL has no path contents
796

797
        if name.endswith('.egg.zip'):
798
            name = name[:-4]  # strip the extra .zip before download
799

800
        filename = os.path.join(tmpdir, name)
801

802
        # Download the file
803
        #
804
        if scheme == 'svn' or scheme.startswith('svn+'):
805
            return self._download_svn(url, filename)
806
        elif scheme == 'git' or scheme.startswith('git+'):
807
            return self._download_git(url, filename)
808
        elif scheme.startswith('hg+'):
809
            return self._download_hg(url, filename)
810
        elif scheme == 'file':
811
            return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])
812
        else:
813
            self.url_ok(url, True)  # raises error if not allowed
814
            return self._attempt_download(url, filename)
815

816
    def scan_url(self, url):
817
        self.process_url(url, True)
818

819
    def _attempt_download(self, url, filename):
820
        headers = self._download_to(url, filename)
821
        if 'html' in headers.get('content-type', '').lower():
822
            return self._download_html(url, headers, filename)
823
        else:
824
            return filename
825

826
    def _download_html(self, url, headers, filename):
827
        file = open(filename)
828
        for line in file:
829
            if line.strip():
830
                # Check for a subversion index page
831
                if re.search(r'<title>([^- ]+ - )?Revision \d+:', line):
832
                    # it's a subversion index page:
833
                    file.close()
834
                    os.unlink(filename)
835
                    return self._download_svn(url, filename)
836
                break  # not an index page
837
        file.close()
838
        os.unlink(filename)
839
        raise DistutilsError("Unexpected HTML page found at " + url)
840

841
    def _download_svn(self, url, filename):
842
        warnings.warn("SVN download support is deprecated", UserWarning)
843
        url = url.split('#', 1)[0]  # remove any fragment for svn's sake
844
        creds = ''
845
        if url.lower().startswith('svn:') and '@' in url:
846
            scheme, netloc, path, p, q, f = urllib.parse.urlparse(url)
847
            if not netloc and path.startswith('//') and '/' in path[2:]:
848
                netloc, path = path[2:].split('/', 1)
849
                auth, host = _splituser(netloc)
850
                if auth:
851
                    if ':' in auth:
852
                        user, pw = auth.split(':', 1)
853
                        creds = " --username=%s --password=%s" % (user, pw)
854
                    else:
855
                        creds = " --username=" + auth
856
                    netloc = host
857
                    parts = scheme, netloc, url, p, q, f
858
                    url = urllib.parse.urlunparse(parts)
859
        self.info("Doing subversion checkout from %s to %s", url, filename)
860
        os.system("svn checkout%s -q %s %s" % (creds, url, filename))
861
        return filename
862

863
    @staticmethod
864
    def _vcs_split_rev_from_url(url, pop_prefix=False):
865
        scheme, netloc, path, query, frag = urllib.parse.urlsplit(url)
866

867
        scheme = scheme.split('+', 1)[-1]
868

869
        # Some fragment identification fails
870
        path = path.split('#', 1)[0]
871

872
        rev = None
873
        if '@' in path:
874
            path, rev = path.rsplit('@', 1)
875

876
        # Also, discard fragment
877
        url = urllib.parse.urlunsplit((scheme, netloc, path, query, ''))
878

879
        return url, rev
880

881
    def _download_git(self, url, filename):
882
        filename = filename.split('#', 1)[0]
883
        url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
884

885
        self.info("Doing git clone from %s to %s", url, filename)
886
        os.system("git clone --quiet %s %s" % (url, filename))
887

888
        if rev is not None:
889
            self.info("Checking out %s", rev)
890
            os.system("git -C %s checkout --quiet %s" % (
891
                filename,
892
                rev,
893
            ))
894

895
        return filename
896

897
    def _download_hg(self, url, filename):
898
        filename = filename.split('#', 1)[0]
899
        url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
900

901
        self.info("Doing hg clone from %s to %s", url, filename)
902
        os.system("hg clone --quiet %s %s" % (url, filename))
903

904
        if rev is not None:
905
            self.info("Updating to %s", rev)
906
            os.system("hg --cwd %s up -C -r %s -q" % (
907
                filename,
908
                rev,
909
            ))
910

911
        return filename
912

913
    def debug(self, msg, *args):
914
        log.debug(msg, *args)
915

916
    def info(self, msg, *args):
917
        log.info(msg, *args)
918

919
    def warn(self, msg, *args):
920
        log.warn(msg, *args)
921

922

923
# This pattern matches a character entity reference (a decimal numeric
924
# references, a hexadecimal numeric reference, or a named reference).
925
entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
926

927

928
def decode_entity(match):
929
    what = match.group(0)
930
    return html.unescape(what)
931

932

933
def htmldecode(text):
934
    """
935
    Decode HTML entities in the given text.
936

937
    >>> htmldecode(
938
    ...     'https://../package_name-0.1.2.tar.gz'
939
    ...     '?tokena=A&amp;tokenb=B">package_name-0.1.2.tar.gz')
940
    'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'
941
    """
942
    return entity_sub(decode_entity, text)
943

944

945
def socket_timeout(timeout=15):
946
    def _socket_timeout(func):
947
        def _socket_timeout(*args, **kwargs):
948
            old_timeout = socket.getdefaulttimeout()
949
            socket.setdefaulttimeout(timeout)
950
            try:
951
                return func(*args, **kwargs)
952
            finally:
953
                socket.setdefaulttimeout(old_timeout)
954

955
        return _socket_timeout
956

957
    return _socket_timeout
958

959

960
def _encode_auth(auth):
961
    """
962
    Encode auth from a URL suitable for an HTTP header.
963
    >>> str(_encode_auth('username%3Apassword'))
964
    'dXNlcm5hbWU6cGFzc3dvcmQ='
965

966
    Long auth strings should not cause a newline to be inserted.
967
    >>> long_auth = 'username:' + 'password'*10
968
    >>> chr(10) in str(_encode_auth(long_auth))
969
    False
970
    """
971
    auth_s = urllib.parse.unquote(auth)
972
    # convert to bytes
973
    auth_bytes = auth_s.encode()
974
    encoded_bytes = base64.b64encode(auth_bytes)
975
    # convert back to a string
976
    encoded = encoded_bytes.decode()
977
    # strip the trailing carriage return
978
    return encoded.replace('\n', '')
979

980

981
class Credential:
982
    """
983
    A username/password pair. Use like a namedtuple.
984
    """
985

986
    def __init__(self, username, password):
987
        self.username = username
988
        self.password = password
989

990
    def __iter__(self):
991
        yield self.username
992
        yield self.password
993

994
    def __str__(self):
995
        return '%(username)s:%(password)s' % vars(self)
996

997

998
class PyPIConfig(configparser.RawConfigParser):
999
    def __init__(self):
1000
        """
1001
        Load from ~/.pypirc
1002
        """
1003
        defaults = dict.fromkeys(['username', 'password', 'repository'], '')
1004
        super().__init__(defaults)
1005

1006
        rc = os.path.join(os.path.expanduser('~'), '.pypirc')
1007
        if os.path.exists(rc):
1008
            self.read(rc)
1009

1010
    @property
1011
    def creds_by_repository(self):
1012
        sections_with_repositories = [
1013
            section for section in self.sections()
1014
            if self.get(section, 'repository').strip()
1015
        ]
1016

1017
        return dict(map(self._get_repo_cred, sections_with_repositories))
1018

1019
    def _get_repo_cred(self, section):
1020
        repo = self.get(section, 'repository').strip()
1021
        return repo, Credential(
1022
            self.get(section, 'username').strip(),
1023
            self.get(section, 'password').strip(),
1024
        )
1025

1026
    def find_credential(self, url):
1027
        """
1028
        If the URL indicated appears to be a repository defined in this
1029
        config, return the credential for that repository.
1030
        """
1031
        for repository, cred in self.creds_by_repository.items():
1032
            if url.startswith(repository):
1033
                return cred
1034

1035

1036
def open_with_auth(url, opener=urllib.request.urlopen):
1037
    """Open a urllib2 request, handling HTTP authentication"""
1038

1039
    parsed = urllib.parse.urlparse(url)
1040
    scheme, netloc, path, params, query, frag = parsed
1041

1042
    # Double scheme does not raise on macOS as revealed by a
1043
    # failing test. We would expect "nonnumeric port". Refs #20.
1044
    if netloc.endswith(':'):
1045
        raise http.client.InvalidURL("nonnumeric port: ''")
1046

1047
    if scheme in ('http', 'https'):
1048
        auth, address = _splituser(netloc)
1049
    else:
1050
        auth = None
1051

1052
    if not auth:
1053
        cred = PyPIConfig().find_credential(url)
1054
        if cred:
1055
            auth = str(cred)
1056
            info = cred.username, url
1057
            log.info('Authenticating as %s for %s (from .pypirc)', *info)
1058

1059
    if auth:
1060
        auth = "Basic " + _encode_auth(auth)
1061
        parts = scheme, address, path, params, query, frag
1062
        new_url = urllib.parse.urlunparse(parts)
1063
        request = urllib.request.Request(new_url)
1064
        request.add_header("Authorization", auth)
1065
    else:
1066
        request = urllib.request.Request(url)
1067

1068
    request.add_header('User-Agent', user_agent)
1069
    fp = opener(request)
1070

1071
    if auth:
1072
        # Put authentication info back into request URL if same host,
1073
        # so that links found on the page will work
1074
        s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
1075
        if s2 == scheme and h2 == address:
1076
            parts = s2, netloc, path2, param2, query2, frag2
1077
            fp.url = urllib.parse.urlunparse(parts)
1078

1079
    return fp
1080

1081

1082
# copy of urllib.parse._splituser from Python 3.8
1083
def _splituser(host):
1084
    """splituser('user[:passwd]@host[:port]')
1085
    --> 'user[:passwd]', 'host[:port]'."""
1086
    user, delim, host = host.rpartition('@')
1087
    return (user if delim else None), host
1088

1089

1090
# adding a timeout to avoid freezing package_index
1091
open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
1092

1093

1094
def fix_sf_url(url):
1095
    return url  # backward compatibility
1096

1097

1098
def local_open(url):
1099
    """Read a local path, with special support for directories"""
1100
    scheme, server, path, param, query, frag = urllib.parse.urlparse(url)
1101
    filename = urllib.request.url2pathname(path)
1102
    if os.path.isfile(filename):
1103
        return urllib.request.urlopen(url)
1104
    elif path.endswith('/') and os.path.isdir(filename):
1105
        files = []
1106
        for f in os.listdir(filename):
1107
            filepath = os.path.join(filename, f)
1108
            if f == 'index.html':
1109
                with open(filepath, 'r') as fp:
1110
                    body = fp.read()
1111
                break
1112
            elif os.path.isdir(filepath):
1113
                f += '/'
1114
            files.append('<a href="{name}">{name}</a>'.format(name=f))
1115
        else:
1116
            tmpl = (
1117
                "<html><head><title>{url}</title>"
1118
                "</head><body>{files}</body></html>")
1119
            body = tmpl.format(url=url, files='\n'.join(files))
1120
        status, message = 200, "OK"
1121
    else:
1122
        status, message, body = 404, "Path not found", "Not found"
1123

1124
    headers = {'content-type': 'text/html'}
1125
    body_stream = io.StringIO(body)
1126
    return urllib.error.HTTPError(url, status, message, headers, body_stream)
1127

1128
Product

Resources

Company