Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
keewenaw
GitHub Repository: keewenaw/ethereum-wallet-cracker
Path: blob/main/test/lib/python3.9/site-packages/setuptools/package_index.py
4798 views
1
"""PyPI and direct package downloading"""
2
import sys
3
import os
4
import re
5
import io
6
import shutil
7
import socket
8
import base64
9
import hashlib
10
import itertools
11
import warnings
12
import configparser
13
import html
14
import http.client
15
import urllib.parse
16
import urllib.request
17
import urllib.error
18
from functools import wraps
19
20
import setuptools
21
from pkg_resources import (
22
CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,
23
Environment, find_distributions, safe_name, safe_version,
24
to_filename, Requirement, DEVELOP_DIST, EGG_DIST, parse_version,
25
)
26
from distutils import log
27
from distutils.errors import DistutilsError
28
from fnmatch import translate
29
from setuptools.wheel import Wheel
30
from setuptools.extern.more_itertools import unique_everseen
31
32
33
EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
34
HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
35
PYPI_MD5 = re.compile(
36
r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'
37
r'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\)'
38
)
39
URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
40
EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
41
42
__all__ = [
43
'PackageIndex', 'distros_for_url', 'parse_bdist_wininst',
44
'interpret_distro_name',
45
]
46
47
_SOCKET_TIMEOUT = 15
48
49
_tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
50
user_agent = _tmpl.format(
51
py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools)
52
53
54
def parse_requirement_arg(spec):
55
try:
56
return Requirement.parse(spec)
57
except ValueError as e:
58
raise DistutilsError(
59
"Not a URL, existing file, or requirement spec: %r" % (spec,)
60
) from e
61
62
63
def parse_bdist_wininst(name):
64
"""Return (base,pyversion) or (None,None) for possible .exe name"""
65
66
lower = name.lower()
67
base, py_ver, plat = None, None, None
68
69
if lower.endswith('.exe'):
70
if lower.endswith('.win32.exe'):
71
base = name[:-10]
72
plat = 'win32'
73
elif lower.startswith('.win32-py', -16):
74
py_ver = name[-7:-4]
75
base = name[:-16]
76
plat = 'win32'
77
elif lower.endswith('.win-amd64.exe'):
78
base = name[:-14]
79
plat = 'win-amd64'
80
elif lower.startswith('.win-amd64-py', -20):
81
py_ver = name[-7:-4]
82
base = name[:-20]
83
plat = 'win-amd64'
84
return base, py_ver, plat
85
86
87
def egg_info_for_url(url):
88
parts = urllib.parse.urlparse(url)
89
scheme, server, path, parameters, query, fragment = parts
90
base = urllib.parse.unquote(path.split('/')[-1])
91
if server == 'sourceforge.net' and base == 'download': # XXX Yuck
92
base = urllib.parse.unquote(path.split('/')[-2])
93
if '#' in base:
94
base, fragment = base.split('#', 1)
95
return base, fragment
96
97
98
def distros_for_url(url, metadata=None):
99
"""Yield egg or source distribution objects that might be found at a URL"""
100
base, fragment = egg_info_for_url(url)
101
for dist in distros_for_location(url, base, metadata):
102
yield dist
103
if fragment:
104
match = EGG_FRAGMENT.match(fragment)
105
if match:
106
for dist in interpret_distro_name(
107
url, match.group(1), metadata, precedence=CHECKOUT_DIST
108
):
109
yield dist
110
111
112
def distros_for_location(location, basename, metadata=None):
113
"""Yield egg or source distribution objects based on basename"""
114
if basename.endswith('.egg.zip'):
115
basename = basename[:-4] # strip the .zip
116
if basename.endswith('.egg') and '-' in basename:
117
# only one, unambiguous interpretation
118
return [Distribution.from_location(location, basename, metadata)]
119
if basename.endswith('.whl') and '-' in basename:
120
wheel = Wheel(basename)
121
if not wheel.is_compatible():
122
return []
123
return [Distribution(
124
location=location,
125
project_name=wheel.project_name,
126
version=wheel.version,
127
# Increase priority over eggs.
128
precedence=EGG_DIST + 1,
129
)]
130
if basename.endswith('.exe'):
131
win_base, py_ver, platform = parse_bdist_wininst(basename)
132
if win_base is not None:
133
return interpret_distro_name(
134
location, win_base, metadata, py_ver, BINARY_DIST, platform
135
)
136
# Try source distro extensions (.zip, .tgz, etc.)
137
#
138
for ext in EXTENSIONS:
139
if basename.endswith(ext):
140
basename = basename[:-len(ext)]
141
return interpret_distro_name(location, basename, metadata)
142
return [] # no extension matched
143
144
145
def distros_for_filename(filename, metadata=None):
146
"""Yield possible egg or source distribution objects based on a filename"""
147
return distros_for_location(
148
normalize_path(filename), os.path.basename(filename), metadata
149
)
150
151
152
def interpret_distro_name(
153
location, basename, metadata, py_version=None, precedence=SOURCE_DIST,
154
platform=None
155
):
156
"""Generate alternative interpretations of a source distro name
157
158
Note: if `location` is a filesystem filename, you should call
159
``pkg_resources.normalize_path()`` on it before passing it to this
160
routine!
161
"""
162
# Generate alternative interpretations of a source distro name
163
# Because some packages are ambiguous as to name/versions split
164
# e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
165
# So, we generate each possible interpretation (e.g. "adns, python-1.1.0"
166
# "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice,
167
# the spurious interpretations should be ignored, because in the event
168
# there's also an "adns" package, the spurious "python-1.1.0" version will
169
# compare lower than any numeric version number, and is therefore unlikely
170
# to match a request for it. It's still a potential problem, though, and
171
# in the long run PyPI and the distutils should go for "safe" names and
172
# versions in distribution archive names (sdist and bdist).
173
174
parts = basename.split('-')
175
if not py_version and any(re.match(r'py\d\.\d$', p) for p in parts[2:]):
176
# it is a bdist_dumb, not an sdist -- bail out
177
return
178
179
for p in range(1, len(parts) + 1):
180
yield Distribution(
181
location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
182
py_version=py_version, precedence=precedence,
183
platform=platform
184
)
185
186
187
def unique_values(func):
188
"""
189
Wrap a function returning an iterable such that the resulting iterable
190
only ever yields unique items.
191
"""
192
193
@wraps(func)
194
def wrapper(*args, **kwargs):
195
return unique_everseen(func(*args, **kwargs))
196
197
return wrapper
198
199
200
REL = re.compile(r"""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
201
# this line is here to fix emacs' cruddy broken syntax highlighting
202
203
204
@unique_values
205
def find_external_links(url, page):
206
"""Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
207
208
for match in REL.finditer(page):
209
tag, rel = match.groups()
210
rels = set(map(str.strip, rel.lower().split(',')))
211
if 'homepage' in rels or 'download' in rels:
212
for match in HREF.finditer(tag):
213
yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
214
215
for tag in ("<th>Home Page", "<th>Download URL"):
216
pos = page.find(tag)
217
if pos != -1:
218
match = HREF.search(page, pos)
219
if match:
220
yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
221
222
223
class ContentChecker:
224
"""
225
A null content checker that defines the interface for checking content
226
"""
227
228
def feed(self, block):
229
"""
230
Feed a block of data to the hash.
231
"""
232
return
233
234
def is_valid(self):
235
"""
236
Check the hash. Return False if validation fails.
237
"""
238
return True
239
240
def report(self, reporter, template):
241
"""
242
Call reporter with information about the checker (hash name)
243
substituted into the template.
244
"""
245
return
246
247
248
class HashChecker(ContentChecker):
249
pattern = re.compile(
250
r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
251
r'(?P<expected>[a-f0-9]+)'
252
)
253
254
def __init__(self, hash_name, expected):
255
self.hash_name = hash_name
256
self.hash = hashlib.new(hash_name)
257
self.expected = expected
258
259
@classmethod
260
def from_url(cls, url):
261
"Construct a (possibly null) ContentChecker from a URL"
262
fragment = urllib.parse.urlparse(url)[-1]
263
if not fragment:
264
return ContentChecker()
265
match = cls.pattern.search(fragment)
266
if not match:
267
return ContentChecker()
268
return cls(**match.groupdict())
269
270
def feed(self, block):
271
self.hash.update(block)
272
273
def is_valid(self):
274
return self.hash.hexdigest() == self.expected
275
276
def report(self, reporter, template):
277
msg = template % self.hash_name
278
return reporter(msg)
279
280
281
class PackageIndex(Environment):
282
"""A distribution index that scans web pages for download URLs"""
283
284
def __init__(
285
self, index_url="https://pypi.org/simple/", hosts=('*',),
286
ca_bundle=None, verify_ssl=True, *args, **kw
287
):
288
super().__init__(*args, **kw)
289
self.index_url = index_url + "/" [:not index_url.endswith('/')]
290
self.scanned_urls = {}
291
self.fetched_urls = {}
292
self.package_pages = {}
293
self.allows = re.compile('|'.join(map(translate, hosts))).match
294
self.to_scan = []
295
self.opener = urllib.request.urlopen
296
297
def add(self, dist):
298
# ignore invalid versions
299
try:
300
parse_version(dist.version)
301
except Exception:
302
return
303
return super().add(dist)
304
305
# FIXME: 'PackageIndex.process_url' is too complex (14)
306
def process_url(self, url, retrieve=False): # noqa: C901
307
"""Evaluate a URL as a possible download, and maybe retrieve it"""
308
if url in self.scanned_urls and not retrieve:
309
return
310
self.scanned_urls[url] = True
311
if not URL_SCHEME(url):
312
self.process_filename(url)
313
return
314
else:
315
dists = list(distros_for_url(url))
316
if dists:
317
if not self.url_ok(url):
318
return
319
self.debug("Found link: %s", url)
320
321
if dists or not retrieve or url in self.fetched_urls:
322
list(map(self.add, dists))
323
return # don't need the actual page
324
325
if not self.url_ok(url):
326
self.fetched_urls[url] = True
327
return
328
329
self.info("Reading %s", url)
330
self.fetched_urls[url] = True # prevent multiple fetch attempts
331
tmpl = "Download error on %s: %%s -- Some packages may not be found!"
332
f = self.open_url(url, tmpl % url)
333
if f is None:
334
return
335
if isinstance(f, urllib.error.HTTPError) and f.code == 401:
336
self.info("Authentication error: %s" % f.msg)
337
self.fetched_urls[f.url] = True
338
if 'html' not in f.headers.get('content-type', '').lower():
339
f.close() # not html, we can't process it
340
return
341
342
base = f.url # handle redirects
343
page = f.read()
344
if not isinstance(page, str):
345
# In Python 3 and got bytes but want str.
346
if isinstance(f, urllib.error.HTTPError):
347
# Errors have no charset, assume latin1:
348
charset = 'latin-1'
349
else:
350
charset = f.headers.get_param('charset') or 'latin-1'
351
page = page.decode(charset, "ignore")
352
f.close()
353
for match in HREF.finditer(page):
354
link = urllib.parse.urljoin(base, htmldecode(match.group(1)))
355
self.process_url(link)
356
if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:
357
page = self.process_index(url, page)
358
359
def process_filename(self, fn, nested=False):
360
# process filenames or directories
361
if not os.path.exists(fn):
362
self.warn("Not found: %s", fn)
363
return
364
365
if os.path.isdir(fn) and not nested:
366
path = os.path.realpath(fn)
367
for item in os.listdir(path):
368
self.process_filename(os.path.join(path, item), True)
369
370
dists = distros_for_filename(fn)
371
if dists:
372
self.debug("Found: %s", fn)
373
list(map(self.add, dists))
374
375
def url_ok(self, url, fatal=False):
376
s = URL_SCHEME(url)
377
is_file = s and s.group(1).lower() == 'file'
378
if is_file or self.allows(urllib.parse.urlparse(url)[1]):
379
return True
380
msg = (
381
"\nNote: Bypassing %s (disallowed host; see "
382
"http://bit.ly/2hrImnY for details).\n")
383
if fatal:
384
raise DistutilsError(msg % url)
385
else:
386
self.warn(msg, url)
387
388
def scan_egg_links(self, search_path):
389
dirs = filter(os.path.isdir, search_path)
390
egg_links = (
391
(path, entry)
392
for path in dirs
393
for entry in os.listdir(path)
394
if entry.endswith('.egg-link')
395
)
396
list(itertools.starmap(self.scan_egg_link, egg_links))
397
398
def scan_egg_link(self, path, entry):
399
with open(os.path.join(path, entry)) as raw_lines:
400
# filter non-empty lines
401
lines = list(filter(None, map(str.strip, raw_lines)))
402
403
if len(lines) != 2:
404
# format is not recognized; punt
405
return
406
407
egg_path, setup_path = lines
408
409
for dist in find_distributions(os.path.join(path, egg_path)):
410
dist.location = os.path.join(path, *lines)
411
dist.precedence = SOURCE_DIST
412
self.add(dist)
413
414
def _scan(self, link):
415
# Process a URL to see if it's for a package page
416
NO_MATCH_SENTINEL = None, None
417
if not link.startswith(self.index_url):
418
return NO_MATCH_SENTINEL
419
420
parts = list(map(
421
urllib.parse.unquote, link[len(self.index_url):].split('/')
422
))
423
if len(parts) != 2 or '#' in parts[1]:
424
return NO_MATCH_SENTINEL
425
426
# it's a package page, sanitize and index it
427
pkg = safe_name(parts[0])
428
ver = safe_version(parts[1])
429
self.package_pages.setdefault(pkg.lower(), {})[link] = True
430
return to_filename(pkg), to_filename(ver)
431
432
def process_index(self, url, page):
433
"""Process the contents of a PyPI page"""
434
435
# process an index page into the package-page index
436
for match in HREF.finditer(page):
437
try:
438
self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
439
except ValueError:
440
pass
441
442
pkg, ver = self._scan(url) # ensure this page is in the page index
443
if not pkg:
444
return "" # no sense double-scanning non-package pages
445
446
# process individual package page
447
for new_url in find_external_links(url, page):
448
# Process the found URL
449
base, frag = egg_info_for_url(new_url)
450
if base.endswith('.py') and not frag:
451
if ver:
452
new_url += '#egg=%s-%s' % (pkg, ver)
453
else:
454
self.need_version_info(url)
455
self.scan_url(new_url)
456
457
return PYPI_MD5.sub(
458
lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
459
)
460
461
def need_version_info(self, url):
462
self.scan_all(
463
"Page at %s links to .py file(s) without version info; an index "
464
"scan is required.", url
465
)
466
467
def scan_all(self, msg=None, *args):
468
if self.index_url not in self.fetched_urls:
469
if msg:
470
self.warn(msg, *args)
471
self.info(
472
"Scanning index of all packages (this may take a while)"
473
)
474
self.scan_url(self.index_url)
475
476
def find_packages(self, requirement):
477
self.scan_url(self.index_url + requirement.unsafe_name + '/')
478
479
if not self.package_pages.get(requirement.key):
480
# Fall back to safe version of the name
481
self.scan_url(self.index_url + requirement.project_name + '/')
482
483
if not self.package_pages.get(requirement.key):
484
# We couldn't find the target package, so search the index page too
485
self.not_found_in_index(requirement)
486
487
for url in list(self.package_pages.get(requirement.key, ())):
488
# scan each page that might be related to the desired package
489
self.scan_url(url)
490
491
def obtain(self, requirement, installer=None):
492
self.prescan()
493
self.find_packages(requirement)
494
for dist in self[requirement.key]:
495
if dist in requirement:
496
return dist
497
self.debug("%s does not match %s", requirement, dist)
498
return super(PackageIndex, self).obtain(requirement, installer)
499
500
def check_hash(self, checker, filename, tfp):
501
"""
502
checker is a ContentChecker
503
"""
504
checker.report(
505
self.debug,
506
"Validating %%s checksum for %s" % filename)
507
if not checker.is_valid():
508
tfp.close()
509
os.unlink(filename)
510
raise DistutilsError(
511
"%s validation failed for %s; "
512
"possible download problem?"
513
% (checker.hash.name, os.path.basename(filename))
514
)
515
516
def add_find_links(self, urls):
517
"""Add `urls` to the list that will be prescanned for searches"""
518
for url in urls:
519
if (
520
self.to_scan is None # if we have already "gone online"
521
or not URL_SCHEME(url) # or it's a local file/directory
522
or url.startswith('file:')
523
or list(distros_for_url(url)) # or a direct package link
524
):
525
# then go ahead and process it now
526
self.scan_url(url)
527
else:
528
# otherwise, defer retrieval till later
529
self.to_scan.append(url)
530
531
def prescan(self):
532
"""Scan urls scheduled for prescanning (e.g. --find-links)"""
533
if self.to_scan:
534
list(map(self.scan_url, self.to_scan))
535
self.to_scan = None # from now on, go ahead and process immediately
536
537
def not_found_in_index(self, requirement):
538
if self[requirement.key]: # we've seen at least one distro
539
meth, msg = self.info, "Couldn't retrieve index page for %r"
540
else: # no distros seen for this name, might be misspelled
541
meth, msg = (
542
self.warn,
543
"Couldn't find index page for %r (maybe misspelled?)")
544
meth(msg, requirement.unsafe_name)
545
self.scan_all()
546
547
def download(self, spec, tmpdir):
548
"""Locate and/or download `spec` to `tmpdir`, returning a local path
549
550
`spec` may be a ``Requirement`` object, or a string containing a URL,
551
an existing local filename, or a project/version requirement spec
552
(i.e. the string form of a ``Requirement`` object). If it is the URL
553
of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
554
that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
555
automatically created alongside the downloaded file.
556
557
If `spec` is a ``Requirement`` object or a string containing a
558
project/version requirement spec, this method returns the location of
559
a matching distribution (possibly after downloading it to `tmpdir`).
560
If `spec` is a locally existing file or directory name, it is simply
561
returned unchanged. If `spec` is a URL, it is downloaded to a subpath
562
of `tmpdir`, and the local filename is returned. Various errors may be
563
raised if a problem occurs during downloading.
564
"""
565
if not isinstance(spec, Requirement):
566
scheme = URL_SCHEME(spec)
567
if scheme:
568
# It's a url, download it to tmpdir
569
found = self._download_url(scheme.group(1), spec, tmpdir)
570
base, fragment = egg_info_for_url(spec)
571
if base.endswith('.py'):
572
found = self.gen_setup(found, fragment, tmpdir)
573
return found
574
elif os.path.exists(spec):
575
# Existing file or directory, just return it
576
return spec
577
else:
578
spec = parse_requirement_arg(spec)
579
return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
580
581
def fetch_distribution( # noqa: C901 # is too complex (14) # FIXME
582
self, requirement, tmpdir, force_scan=False, source=False,
583
develop_ok=False, local_index=None):
584
"""Obtain a distribution suitable for fulfilling `requirement`
585
586
`requirement` must be a ``pkg_resources.Requirement`` instance.
587
If necessary, or if the `force_scan` flag is set, the requirement is
588
searched for in the (online) package index as well as the locally
589
installed packages. If a distribution matching `requirement` is found,
590
the returned distribution's ``location`` is the value you would have
591
gotten from calling the ``download()`` method with the matching
592
distribution's URL or filename. If no matching distribution is found,
593
``None`` is returned.
594
595
If the `source` flag is set, only source distributions and source
596
checkout links will be considered. Unless the `develop_ok` flag is
597
set, development and system eggs (i.e., those using the ``.egg-info``
598
format) will be ignored.
599
"""
600
# process a Requirement
601
self.info("Searching for %s", requirement)
602
skipped = {}
603
dist = None
604
605
def find(req, env=None):
606
if env is None:
607
env = self
608
# Find a matching distribution; may be called more than once
609
610
for dist in env[req.key]:
611
612
if dist.precedence == DEVELOP_DIST and not develop_ok:
613
if dist not in skipped:
614
self.warn(
615
"Skipping development or system egg: %s", dist,
616
)
617
skipped[dist] = 1
618
continue
619
620
test = (
621
dist in req
622
and (dist.precedence <= SOURCE_DIST or not source)
623
)
624
if test:
625
loc = self.download(dist.location, tmpdir)
626
dist.download_location = loc
627
if os.path.exists(dist.download_location):
628
return dist
629
630
if force_scan:
631
self.prescan()
632
self.find_packages(requirement)
633
dist = find(requirement)
634
635
if not dist and local_index is not None:
636
dist = find(requirement, local_index)
637
638
if dist is None:
639
if self.to_scan is not None:
640
self.prescan()
641
dist = find(requirement)
642
643
if dist is None and not force_scan:
644
self.find_packages(requirement)
645
dist = find(requirement)
646
647
if dist is None:
648
self.warn(
649
"No local packages or working download links found for %s%s",
650
(source and "a source distribution of " or ""),
651
requirement,
652
)
653
else:
654
self.info("Best match: %s", dist)
655
return dist.clone(location=dist.download_location)
656
657
def fetch(self, requirement, tmpdir, force_scan=False, source=False):
658
"""Obtain a file suitable for fulfilling `requirement`
659
660
DEPRECATED; use the ``fetch_distribution()`` method now instead. For
661
backward compatibility, this routine is identical but returns the
662
``location`` of the downloaded distribution instead of a distribution
663
object.
664
"""
665
dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)
666
if dist is not None:
667
return dist.location
668
return None
669
670
def gen_setup(self, filename, fragment, tmpdir):
671
match = EGG_FRAGMENT.match(fragment)
672
dists = match and [
673
d for d in
674
interpret_distro_name(filename, match.group(1), None) if d.version
675
] or []
676
677
if len(dists) == 1: # unambiguous ``#egg`` fragment
678
basename = os.path.basename(filename)
679
680
# Make sure the file has been downloaded to the temp dir.
681
if os.path.dirname(filename) != tmpdir:
682
dst = os.path.join(tmpdir, basename)
683
if not (os.path.exists(dst) and os.path.samefile(filename, dst)):
684
shutil.copy2(filename, dst)
685
filename = dst
686
687
with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
688
file.write(
689
"from setuptools import setup\n"
690
"setup(name=%r, version=%r, py_modules=[%r])\n"
691
% (
692
dists[0].project_name, dists[0].version,
693
os.path.splitext(basename)[0]
694
)
695
)
696
return filename
697
698
elif match:
699
raise DistutilsError(
700
"Can't unambiguously interpret project/version identifier %r; "
701
"any dashes in the name or version should be escaped using "
702
"underscores. %r" % (fragment, dists)
703
)
704
else:
705
raise DistutilsError(
706
"Can't process plain .py files without an '#egg=name-version'"
707
" suffix to enable automatic setup script generation."
708
)
709
710
dl_blocksize = 8192
711
712
def _download_to(self, url, filename):
713
self.info("Downloading %s", url)
714
# Download the file
715
fp = None
716
try:
717
checker = HashChecker.from_url(url)
718
fp = self.open_url(url)
719
if isinstance(fp, urllib.error.HTTPError):
720
raise DistutilsError(
721
"Can't download %s: %s %s" % (url, fp.code, fp.msg)
722
)
723
headers = fp.info()
724
blocknum = 0
725
bs = self.dl_blocksize
726
size = -1
727
if "content-length" in headers:
728
# Some servers return multiple Content-Length headers :(
729
sizes = headers.get_all('Content-Length')
730
size = max(map(int, sizes))
731
self.reporthook(url, filename, blocknum, bs, size)
732
with open(filename, 'wb') as tfp:
733
while True:
734
block = fp.read(bs)
735
if block:
736
checker.feed(block)
737
tfp.write(block)
738
blocknum += 1
739
self.reporthook(url, filename, blocknum, bs, size)
740
else:
741
break
742
self.check_hash(checker, filename, tfp)
743
return headers
744
finally:
745
if fp:
746
fp.close()
747
748
def reporthook(self, url, filename, blocknum, blksize, size):
749
pass # no-op
750
751
# FIXME:
752
def open_url(self, url, warning=None): # noqa: C901 # is too complex (12)
753
if url.startswith('file:'):
754
return local_open(url)
755
try:
756
return open_with_auth(url, self.opener)
757
except (ValueError, http.client.InvalidURL) as v:
758
msg = ' '.join([str(arg) for arg in v.args])
759
if warning:
760
self.warn(warning, msg)
761
else:
762
raise DistutilsError('%s %s' % (url, msg)) from v
763
except urllib.error.HTTPError as v:
764
return v
765
except urllib.error.URLError as v:
766
if warning:
767
self.warn(warning, v.reason)
768
else:
769
raise DistutilsError("Download error for %s: %s"
770
% (url, v.reason)) from v
771
except http.client.BadStatusLine as v:
772
if warning:
773
self.warn(warning, v.line)
774
else:
775
raise DistutilsError(
776
'%s returned a bad status line. The server might be '
777
'down, %s' %
778
(url, v.line)
779
) from v
780
except (http.client.HTTPException, socket.error) as v:
781
if warning:
782
self.warn(warning, v)
783
else:
784
raise DistutilsError("Download error for %s: %s"
785
% (url, v)) from v
786
787
def _download_url(self, scheme, url, tmpdir):
788
# Determine download filename
789
#
790
name, fragment = egg_info_for_url(url)
791
if name:
792
while '..' in name:
793
name = name.replace('..', '.').replace('\\', '_')
794
else:
795
name = "__downloaded__" # default if URL has no path contents
796
797
if name.endswith('.egg.zip'):
798
name = name[:-4] # strip the extra .zip before download
799
800
filename = os.path.join(tmpdir, name)
801
802
# Download the file
803
#
804
if scheme == 'svn' or scheme.startswith('svn+'):
805
return self._download_svn(url, filename)
806
elif scheme == 'git' or scheme.startswith('git+'):
807
return self._download_git(url, filename)
808
elif scheme.startswith('hg+'):
809
return self._download_hg(url, filename)
810
elif scheme == 'file':
811
return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])
812
else:
813
self.url_ok(url, True) # raises error if not allowed
814
return self._attempt_download(url, filename)
815
816
def scan_url(self, url):
817
self.process_url(url, True)
818
819
def _attempt_download(self, url, filename):
820
headers = self._download_to(url, filename)
821
if 'html' in headers.get('content-type', '').lower():
822
return self._download_html(url, headers, filename)
823
else:
824
return filename
825
826
def _download_html(self, url, headers, filename):
827
file = open(filename)
828
for line in file:
829
if line.strip():
830
# Check for a subversion index page
831
if re.search(r'<title>([^- ]+ - )?Revision \d+:', line):
832
# it's a subversion index page:
833
file.close()
834
os.unlink(filename)
835
return self._download_svn(url, filename)
836
break # not an index page
837
file.close()
838
os.unlink(filename)
839
raise DistutilsError("Unexpected HTML page found at " + url)
840
841
def _download_svn(self, url, filename):
842
warnings.warn("SVN download support is deprecated", UserWarning)
843
url = url.split('#', 1)[0] # remove any fragment for svn's sake
844
creds = ''
845
if url.lower().startswith('svn:') and '@' in url:
846
scheme, netloc, path, p, q, f = urllib.parse.urlparse(url)
847
if not netloc and path.startswith('//') and '/' in path[2:]:
848
netloc, path = path[2:].split('/', 1)
849
auth, host = _splituser(netloc)
850
if auth:
851
if ':' in auth:
852
user, pw = auth.split(':', 1)
853
creds = " --username=%s --password=%s" % (user, pw)
854
else:
855
creds = " --username=" + auth
856
netloc = host
857
parts = scheme, netloc, url, p, q, f
858
url = urllib.parse.urlunparse(parts)
859
self.info("Doing subversion checkout from %s to %s", url, filename)
860
os.system("svn checkout%s -q %s %s" % (creds, url, filename))
861
return filename
862
863
@staticmethod
864
def _vcs_split_rev_from_url(url, pop_prefix=False):
865
scheme, netloc, path, query, frag = urllib.parse.urlsplit(url)
866
867
scheme = scheme.split('+', 1)[-1]
868
869
# Some fragment identification fails
870
path = path.split('#', 1)[0]
871
872
rev = None
873
if '@' in path:
874
path, rev = path.rsplit('@', 1)
875
876
# Also, discard fragment
877
url = urllib.parse.urlunsplit((scheme, netloc, path, query, ''))
878
879
return url, rev
880
881
def _download_git(self, url, filename):
882
filename = filename.split('#', 1)[0]
883
url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
884
885
self.info("Doing git clone from %s to %s", url, filename)
886
os.system("git clone --quiet %s %s" % (url, filename))
887
888
if rev is not None:
889
self.info("Checking out %s", rev)
890
os.system("git -C %s checkout --quiet %s" % (
891
filename,
892
rev,
893
))
894
895
return filename
896
897
def _download_hg(self, url, filename):
898
filename = filename.split('#', 1)[0]
899
url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
900
901
self.info("Doing hg clone from %s to %s", url, filename)
902
os.system("hg clone --quiet %s %s" % (url, filename))
903
904
if rev is not None:
905
self.info("Updating to %s", rev)
906
os.system("hg --cwd %s up -C -r %s -q" % (
907
filename,
908
rev,
909
))
910
911
return filename
912
913
def debug(self, msg, *args):
914
log.debug(msg, *args)
915
916
def info(self, msg, *args):
917
log.info(msg, *args)
918
919
def warn(self, msg, *args):
920
log.warn(msg, *args)
921
922
923
# This pattern matches a character entity reference (a decimal numeric
924
# references, a hexadecimal numeric reference, or a named reference).
925
entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
926
927
928
def decode_entity(match):
929
what = match.group(0)
930
return html.unescape(what)
931
932
933
def htmldecode(text):
934
"""
935
Decode HTML entities in the given text.
936
937
>>> htmldecode(
938
... 'https://../package_name-0.1.2.tar.gz'
939
... '?tokena=A&amp;tokenb=B">package_name-0.1.2.tar.gz')
940
'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'
941
"""
942
return entity_sub(decode_entity, text)
943
944
945
def socket_timeout(timeout=15):
946
def _socket_timeout(func):
947
def _socket_timeout(*args, **kwargs):
948
old_timeout = socket.getdefaulttimeout()
949
socket.setdefaulttimeout(timeout)
950
try:
951
return func(*args, **kwargs)
952
finally:
953
socket.setdefaulttimeout(old_timeout)
954
955
return _socket_timeout
956
957
return _socket_timeout
958
959
960
def _encode_auth(auth):
961
"""
962
Encode auth from a URL suitable for an HTTP header.
963
>>> str(_encode_auth('username%3Apassword'))
964
'dXNlcm5hbWU6cGFzc3dvcmQ='
965
966
Long auth strings should not cause a newline to be inserted.
967
>>> long_auth = 'username:' + 'password'*10
968
>>> chr(10) in str(_encode_auth(long_auth))
969
False
970
"""
971
auth_s = urllib.parse.unquote(auth)
972
# convert to bytes
973
auth_bytes = auth_s.encode()
974
encoded_bytes = base64.b64encode(auth_bytes)
975
# convert back to a string
976
encoded = encoded_bytes.decode()
977
# strip the trailing carriage return
978
return encoded.replace('\n', '')
979
980
981
class Credential:
982
"""
983
A username/password pair. Use like a namedtuple.
984
"""
985
986
def __init__(self, username, password):
987
self.username = username
988
self.password = password
989
990
def __iter__(self):
991
yield self.username
992
yield self.password
993
994
def __str__(self):
995
return '%(username)s:%(password)s' % vars(self)
996
997
998
class PyPIConfig(configparser.RawConfigParser):
999
def __init__(self):
1000
"""
1001
Load from ~/.pypirc
1002
"""
1003
defaults = dict.fromkeys(['username', 'password', 'repository'], '')
1004
super().__init__(defaults)
1005
1006
rc = os.path.join(os.path.expanduser('~'), '.pypirc')
1007
if os.path.exists(rc):
1008
self.read(rc)
1009
1010
@property
1011
def creds_by_repository(self):
1012
sections_with_repositories = [
1013
section for section in self.sections()
1014
if self.get(section, 'repository').strip()
1015
]
1016
1017
return dict(map(self._get_repo_cred, sections_with_repositories))
1018
1019
def _get_repo_cred(self, section):
1020
repo = self.get(section, 'repository').strip()
1021
return repo, Credential(
1022
self.get(section, 'username').strip(),
1023
self.get(section, 'password').strip(),
1024
)
1025
1026
def find_credential(self, url):
1027
"""
1028
If the URL indicated appears to be a repository defined in this
1029
config, return the credential for that repository.
1030
"""
1031
for repository, cred in self.creds_by_repository.items():
1032
if url.startswith(repository):
1033
return cred
1034
1035
1036
def open_with_auth(url, opener=urllib.request.urlopen):
1037
"""Open a urllib2 request, handling HTTP authentication"""
1038
1039
parsed = urllib.parse.urlparse(url)
1040
scheme, netloc, path, params, query, frag = parsed
1041
1042
# Double scheme does not raise on macOS as revealed by a
1043
# failing test. We would expect "nonnumeric port". Refs #20.
1044
if netloc.endswith(':'):
1045
raise http.client.InvalidURL("nonnumeric port: ''")
1046
1047
if scheme in ('http', 'https'):
1048
auth, address = _splituser(netloc)
1049
else:
1050
auth = None
1051
1052
if not auth:
1053
cred = PyPIConfig().find_credential(url)
1054
if cred:
1055
auth = str(cred)
1056
info = cred.username, url
1057
log.info('Authenticating as %s for %s (from .pypirc)', *info)
1058
1059
if auth:
1060
auth = "Basic " + _encode_auth(auth)
1061
parts = scheme, address, path, params, query, frag
1062
new_url = urllib.parse.urlunparse(parts)
1063
request = urllib.request.Request(new_url)
1064
request.add_header("Authorization", auth)
1065
else:
1066
request = urllib.request.Request(url)
1067
1068
request.add_header('User-Agent', user_agent)
1069
fp = opener(request)
1070
1071
if auth:
1072
# Put authentication info back into request URL if same host,
1073
# so that links found on the page will work
1074
s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
1075
if s2 == scheme and h2 == address:
1076
parts = s2, netloc, path2, param2, query2, frag2
1077
fp.url = urllib.parse.urlunparse(parts)
1078
1079
return fp
1080
1081
1082
# copy of urllib.parse._splituser from Python 3.8
1083
def _splituser(host):
1084
"""splituser('user[:passwd]@host[:port]')
1085
--> 'user[:passwd]', 'host[:port]'."""
1086
user, delim, host = host.rpartition('@')
1087
return (user if delim else None), host
1088
1089
1090
# adding a timeout to avoid freezing package_index
1091
open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
1092
1093
1094
def fix_sf_url(url):
1095
return url # backward compatibility
1096
1097
1098
def local_open(url):
1099
"""Read a local path, with special support for directories"""
1100
scheme, server, path, param, query, frag = urllib.parse.urlparse(url)
1101
filename = urllib.request.url2pathname(path)
1102
if os.path.isfile(filename):
1103
return urllib.request.urlopen(url)
1104
elif path.endswith('/') and os.path.isdir(filename):
1105
files = []
1106
for f in os.listdir(filename):
1107
filepath = os.path.join(filename, f)
1108
if f == 'index.html':
1109
with open(filepath, 'r') as fp:
1110
body = fp.read()
1111
break
1112
elif os.path.isdir(filepath):
1113
f += '/'
1114
files.append('<a href="{name}">{name}</a>'.format(name=f))
1115
else:
1116
tmpl = (
1117
"<html><head><title>{url}</title>"
1118
"</head><body>{files}</body></html>")
1119
body = tmpl.format(url=url, files='\n'.join(files))
1120
status, message = 200, "OK"
1121
else:
1122
status, message, body = 404, "Path not found", "Not found"
1123
1124
headers = {'content-type': 'text/html'}
1125
body_stream = io.StringIO(body)
1126
return urllib.error.HTTPError(url, status, message, headers, body_stream)
1127
1128