Path: blob/main/test/lib/python3.9/site-packages/setuptools/package_index.py
4798 views
"""PyPI and direct package downloading"""1import sys2import os3import re4import io5import shutil6import socket7import base648import hashlib9import itertools10import warnings11import configparser12import html13import http.client14import urllib.parse15import urllib.request16import urllib.error17from functools import wraps1819import setuptools20from pkg_resources import (21CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,22Environment, find_distributions, safe_name, safe_version,23to_filename, Requirement, DEVELOP_DIST, EGG_DIST, parse_version,24)25from distutils import log26from distutils.errors import DistutilsError27from fnmatch import translate28from setuptools.wheel import Wheel29from setuptools.extern.more_itertools import unique_everseen303132EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')33HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)34PYPI_MD5 = re.compile(35r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'36r'href="[^?]+\?:action=show_md5&digest=([0-9a-f]{32})">md5</a>\)'37)38URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match39EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()4041__all__ = [42'PackageIndex', 'distros_for_url', 'parse_bdist_wininst',43'interpret_distro_name',44]4546_SOCKET_TIMEOUT = 154748_tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"49user_agent = _tmpl.format(50py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools)515253def parse_requirement_arg(spec):54try:55return Requirement.parse(spec)56except ValueError as e:57raise DistutilsError(58"Not a URL, existing file, or requirement spec: %r" % (spec,)59) from e606162def parse_bdist_wininst(name):63"""Return (base,pyversion) or (None,None) for possible .exe name"""6465lower = name.lower()66base, py_ver, plat = None, None, None6768if lower.endswith('.exe'):69if lower.endswith('.win32.exe'):70base = name[:-10]71plat = 'win32'72elif lower.startswith('.win32-py', -16):73py_ver = name[-7:-4]74base = name[:-16]75plat = 'win32'76elif lower.endswith('.win-amd64.exe'):77base = name[:-14]78plat = 'win-amd64'79elif lower.startswith('.win-amd64-py', -20):80py_ver = name[-7:-4]81base = name[:-20]82plat = 'win-amd64'83return base, py_ver, plat848586def egg_info_for_url(url):87parts = urllib.parse.urlparse(url)88scheme, server, path, parameters, query, fragment = parts89base = urllib.parse.unquote(path.split('/')[-1])90if server == 'sourceforge.net' and base == 'download': # XXX Yuck91base = urllib.parse.unquote(path.split('/')[-2])92if '#' in base:93base, fragment = base.split('#', 1)94return base, fragment959697def distros_for_url(url, metadata=None):98"""Yield egg or source distribution objects that might be found at a URL"""99base, fragment = egg_info_for_url(url)100for dist in distros_for_location(url, base, metadata):101yield dist102if fragment:103match = EGG_FRAGMENT.match(fragment)104if match:105for dist in interpret_distro_name(106url, match.group(1), metadata, precedence=CHECKOUT_DIST107):108yield dist109110111def distros_for_location(location, basename, metadata=None):112"""Yield egg or source distribution objects based on basename"""113if basename.endswith('.egg.zip'):114basename = basename[:-4] # strip the .zip115if basename.endswith('.egg') and '-' in basename:116# only one, unambiguous interpretation117return [Distribution.from_location(location, basename, metadata)]118if basename.endswith('.whl') and '-' in basename:119wheel = Wheel(basename)120if not wheel.is_compatible():121return []122return [Distribution(123location=location,124project_name=wheel.project_name,125version=wheel.version,126# Increase priority over eggs.127precedence=EGG_DIST + 1,128)]129if basename.endswith('.exe'):130win_base, py_ver, platform = parse_bdist_wininst(basename)131if win_base is not None:132return interpret_distro_name(133location, win_base, metadata, py_ver, BINARY_DIST, platform134)135# Try source distro extensions (.zip, .tgz, etc.)136#137for ext in EXTENSIONS:138if basename.endswith(ext):139basename = basename[:-len(ext)]140return interpret_distro_name(location, basename, metadata)141return [] # no extension matched142143144def distros_for_filename(filename, metadata=None):145"""Yield possible egg or source distribution objects based on a filename"""146return distros_for_location(147normalize_path(filename), os.path.basename(filename), metadata148)149150151def interpret_distro_name(152location, basename, metadata, py_version=None, precedence=SOURCE_DIST,153platform=None154):155"""Generate alternative interpretations of a source distro name156157Note: if `location` is a filesystem filename, you should call158``pkg_resources.normalize_path()`` on it before passing it to this159routine!160"""161# Generate alternative interpretations of a source distro name162# Because some packages are ambiguous as to name/versions split163# e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.164# So, we generate each possible interpretation (e.g. "adns, python-1.1.0"165# "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice,166# the spurious interpretations should be ignored, because in the event167# there's also an "adns" package, the spurious "python-1.1.0" version will168# compare lower than any numeric version number, and is therefore unlikely169# to match a request for it. It's still a potential problem, though, and170# in the long run PyPI and the distutils should go for "safe" names and171# versions in distribution archive names (sdist and bdist).172173parts = basename.split('-')174if not py_version and any(re.match(r'py\d\.\d$', p) for p in parts[2:]):175# it is a bdist_dumb, not an sdist -- bail out176return177178for p in range(1, len(parts) + 1):179yield Distribution(180location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),181py_version=py_version, precedence=precedence,182platform=platform183)184185186def unique_values(func):187"""188Wrap a function returning an iterable such that the resulting iterable189only ever yields unique items.190"""191192@wraps(func)193def wrapper(*args, **kwargs):194return unique_everseen(func(*args, **kwargs))195196return wrapper197198199REL = re.compile(r"""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)200# this line is here to fix emacs' cruddy broken syntax highlighting201202203@unique_values204def find_external_links(url, page):205"""Find rel="homepage" and rel="download" links in `page`, yielding URLs"""206207for match in REL.finditer(page):208tag, rel = match.groups()209rels = set(map(str.strip, rel.lower().split(',')))210if 'homepage' in rels or 'download' in rels:211for match in HREF.finditer(tag):212yield urllib.parse.urljoin(url, htmldecode(match.group(1)))213214for tag in ("<th>Home Page", "<th>Download URL"):215pos = page.find(tag)216if pos != -1:217match = HREF.search(page, pos)218if match:219yield urllib.parse.urljoin(url, htmldecode(match.group(1)))220221222class ContentChecker:223"""224A null content checker that defines the interface for checking content225"""226227def feed(self, block):228"""229Feed a block of data to the hash.230"""231return232233def is_valid(self):234"""235Check the hash. Return False if validation fails.236"""237return True238239def report(self, reporter, template):240"""241Call reporter with information about the checker (hash name)242substituted into the template.243"""244return245246247class HashChecker(ContentChecker):248pattern = re.compile(249r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='250r'(?P<expected>[a-f0-9]+)'251)252253def __init__(self, hash_name, expected):254self.hash_name = hash_name255self.hash = hashlib.new(hash_name)256self.expected = expected257258@classmethod259def from_url(cls, url):260"Construct a (possibly null) ContentChecker from a URL"261fragment = urllib.parse.urlparse(url)[-1]262if not fragment:263return ContentChecker()264match = cls.pattern.search(fragment)265if not match:266return ContentChecker()267return cls(**match.groupdict())268269def feed(self, block):270self.hash.update(block)271272def is_valid(self):273return self.hash.hexdigest() == self.expected274275def report(self, reporter, template):276msg = template % self.hash_name277return reporter(msg)278279280class PackageIndex(Environment):281"""A distribution index that scans web pages for download URLs"""282283def __init__(284self, index_url="https://pypi.org/simple/", hosts=('*',),285ca_bundle=None, verify_ssl=True, *args, **kw286):287super().__init__(*args, **kw)288self.index_url = index_url + "/" [:not index_url.endswith('/')]289self.scanned_urls = {}290self.fetched_urls = {}291self.package_pages = {}292self.allows = re.compile('|'.join(map(translate, hosts))).match293self.to_scan = []294self.opener = urllib.request.urlopen295296def add(self, dist):297# ignore invalid versions298try:299parse_version(dist.version)300except Exception:301return302return super().add(dist)303304# FIXME: 'PackageIndex.process_url' is too complex (14)305def process_url(self, url, retrieve=False): # noqa: C901306"""Evaluate a URL as a possible download, and maybe retrieve it"""307if url in self.scanned_urls and not retrieve:308return309self.scanned_urls[url] = True310if not URL_SCHEME(url):311self.process_filename(url)312return313else:314dists = list(distros_for_url(url))315if dists:316if not self.url_ok(url):317return318self.debug("Found link: %s", url)319320if dists or not retrieve or url in self.fetched_urls:321list(map(self.add, dists))322return # don't need the actual page323324if not self.url_ok(url):325self.fetched_urls[url] = True326return327328self.info("Reading %s", url)329self.fetched_urls[url] = True # prevent multiple fetch attempts330tmpl = "Download error on %s: %%s -- Some packages may not be found!"331f = self.open_url(url, tmpl % url)332if f is None:333return334if isinstance(f, urllib.error.HTTPError) and f.code == 401:335self.info("Authentication error: %s" % f.msg)336self.fetched_urls[f.url] = True337if 'html' not in f.headers.get('content-type', '').lower():338f.close() # not html, we can't process it339return340341base = f.url # handle redirects342page = f.read()343if not isinstance(page, str):344# In Python 3 and got bytes but want str.345if isinstance(f, urllib.error.HTTPError):346# Errors have no charset, assume latin1:347charset = 'latin-1'348else:349charset = f.headers.get_param('charset') or 'latin-1'350page = page.decode(charset, "ignore")351f.close()352for match in HREF.finditer(page):353link = urllib.parse.urljoin(base, htmldecode(match.group(1)))354self.process_url(link)355if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:356page = self.process_index(url, page)357358def process_filename(self, fn, nested=False):359# process filenames or directories360if not os.path.exists(fn):361self.warn("Not found: %s", fn)362return363364if os.path.isdir(fn) and not nested:365path = os.path.realpath(fn)366for item in os.listdir(path):367self.process_filename(os.path.join(path, item), True)368369dists = distros_for_filename(fn)370if dists:371self.debug("Found: %s", fn)372list(map(self.add, dists))373374def url_ok(self, url, fatal=False):375s = URL_SCHEME(url)376is_file = s and s.group(1).lower() == 'file'377if is_file or self.allows(urllib.parse.urlparse(url)[1]):378return True379msg = (380"\nNote: Bypassing %s (disallowed host; see "381"http://bit.ly/2hrImnY for details).\n")382if fatal:383raise DistutilsError(msg % url)384else:385self.warn(msg, url)386387def scan_egg_links(self, search_path):388dirs = filter(os.path.isdir, search_path)389egg_links = (390(path, entry)391for path in dirs392for entry in os.listdir(path)393if entry.endswith('.egg-link')394)395list(itertools.starmap(self.scan_egg_link, egg_links))396397def scan_egg_link(self, path, entry):398with open(os.path.join(path, entry)) as raw_lines:399# filter non-empty lines400lines = list(filter(None, map(str.strip, raw_lines)))401402if len(lines) != 2:403# format is not recognized; punt404return405406egg_path, setup_path = lines407408for dist in find_distributions(os.path.join(path, egg_path)):409dist.location = os.path.join(path, *lines)410dist.precedence = SOURCE_DIST411self.add(dist)412413def _scan(self, link):414# Process a URL to see if it's for a package page415NO_MATCH_SENTINEL = None, None416if not link.startswith(self.index_url):417return NO_MATCH_SENTINEL418419parts = list(map(420urllib.parse.unquote, link[len(self.index_url):].split('/')421))422if len(parts) != 2 or '#' in parts[1]:423return NO_MATCH_SENTINEL424425# it's a package page, sanitize and index it426pkg = safe_name(parts[0])427ver = safe_version(parts[1])428self.package_pages.setdefault(pkg.lower(), {})[link] = True429return to_filename(pkg), to_filename(ver)430431def process_index(self, url, page):432"""Process the contents of a PyPI page"""433434# process an index page into the package-page index435for match in HREF.finditer(page):436try:437self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))438except ValueError:439pass440441pkg, ver = self._scan(url) # ensure this page is in the page index442if not pkg:443return "" # no sense double-scanning non-package pages444445# process individual package page446for new_url in find_external_links(url, page):447# Process the found URL448base, frag = egg_info_for_url(new_url)449if base.endswith('.py') and not frag:450if ver:451new_url += '#egg=%s-%s' % (pkg, ver)452else:453self.need_version_info(url)454self.scan_url(new_url)455456return PYPI_MD5.sub(457lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page458)459460def need_version_info(self, url):461self.scan_all(462"Page at %s links to .py file(s) without version info; an index "463"scan is required.", url464)465466def scan_all(self, msg=None, *args):467if self.index_url not in self.fetched_urls:468if msg:469self.warn(msg, *args)470self.info(471"Scanning index of all packages (this may take a while)"472)473self.scan_url(self.index_url)474475def find_packages(self, requirement):476self.scan_url(self.index_url + requirement.unsafe_name + '/')477478if not self.package_pages.get(requirement.key):479# Fall back to safe version of the name480self.scan_url(self.index_url + requirement.project_name + '/')481482if not self.package_pages.get(requirement.key):483# We couldn't find the target package, so search the index page too484self.not_found_in_index(requirement)485486for url in list(self.package_pages.get(requirement.key, ())):487# scan each page that might be related to the desired package488self.scan_url(url)489490def obtain(self, requirement, installer=None):491self.prescan()492self.find_packages(requirement)493for dist in self[requirement.key]:494if dist in requirement:495return dist496self.debug("%s does not match %s", requirement, dist)497return super(PackageIndex, self).obtain(requirement, installer)498499def check_hash(self, checker, filename, tfp):500"""501checker is a ContentChecker502"""503checker.report(504self.debug,505"Validating %%s checksum for %s" % filename)506if not checker.is_valid():507tfp.close()508os.unlink(filename)509raise DistutilsError(510"%s validation failed for %s; "511"possible download problem?"512% (checker.hash.name, os.path.basename(filename))513)514515def add_find_links(self, urls):516"""Add `urls` to the list that will be prescanned for searches"""517for url in urls:518if (519self.to_scan is None # if we have already "gone online"520or not URL_SCHEME(url) # or it's a local file/directory521or url.startswith('file:')522or list(distros_for_url(url)) # or a direct package link523):524# then go ahead and process it now525self.scan_url(url)526else:527# otherwise, defer retrieval till later528self.to_scan.append(url)529530def prescan(self):531"""Scan urls scheduled for prescanning (e.g. --find-links)"""532if self.to_scan:533list(map(self.scan_url, self.to_scan))534self.to_scan = None # from now on, go ahead and process immediately535536def not_found_in_index(self, requirement):537if self[requirement.key]: # we've seen at least one distro538meth, msg = self.info, "Couldn't retrieve index page for %r"539else: # no distros seen for this name, might be misspelled540meth, msg = (541self.warn,542"Couldn't find index page for %r (maybe misspelled?)")543meth(msg, requirement.unsafe_name)544self.scan_all()545546def download(self, spec, tmpdir):547"""Locate and/or download `spec` to `tmpdir`, returning a local path548549`spec` may be a ``Requirement`` object, or a string containing a URL,550an existing local filename, or a project/version requirement spec551(i.e. the string form of a ``Requirement`` object). If it is the URL552of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one553that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is554automatically created alongside the downloaded file.555556If `spec` is a ``Requirement`` object or a string containing a557project/version requirement spec, this method returns the location of558a matching distribution (possibly after downloading it to `tmpdir`).559If `spec` is a locally existing file or directory name, it is simply560returned unchanged. If `spec` is a URL, it is downloaded to a subpath561of `tmpdir`, and the local filename is returned. Various errors may be562raised if a problem occurs during downloading.563"""564if not isinstance(spec, Requirement):565scheme = URL_SCHEME(spec)566if scheme:567# It's a url, download it to tmpdir568found = self._download_url(scheme.group(1), spec, tmpdir)569base, fragment = egg_info_for_url(spec)570if base.endswith('.py'):571found = self.gen_setup(found, fragment, tmpdir)572return found573elif os.path.exists(spec):574# Existing file or directory, just return it575return spec576else:577spec = parse_requirement_arg(spec)578return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)579580def fetch_distribution( # noqa: C901 # is too complex (14) # FIXME581self, requirement, tmpdir, force_scan=False, source=False,582develop_ok=False, local_index=None):583"""Obtain a distribution suitable for fulfilling `requirement`584585`requirement` must be a ``pkg_resources.Requirement`` instance.586If necessary, or if the `force_scan` flag is set, the requirement is587searched for in the (online) package index as well as the locally588installed packages. If a distribution matching `requirement` is found,589the returned distribution's ``location`` is the value you would have590gotten from calling the ``download()`` method with the matching591distribution's URL or filename. If no matching distribution is found,592``None`` is returned.593594If the `source` flag is set, only source distributions and source595checkout links will be considered. Unless the `develop_ok` flag is596set, development and system eggs (i.e., those using the ``.egg-info``597format) will be ignored.598"""599# process a Requirement600self.info("Searching for %s", requirement)601skipped = {}602dist = None603604def find(req, env=None):605if env is None:606env = self607# Find a matching distribution; may be called more than once608609for dist in env[req.key]:610611if dist.precedence == DEVELOP_DIST and not develop_ok:612if dist not in skipped:613self.warn(614"Skipping development or system egg: %s", dist,615)616skipped[dist] = 1617continue618619test = (620dist in req621and (dist.precedence <= SOURCE_DIST or not source)622)623if test:624loc = self.download(dist.location, tmpdir)625dist.download_location = loc626if os.path.exists(dist.download_location):627return dist628629if force_scan:630self.prescan()631self.find_packages(requirement)632dist = find(requirement)633634if not dist and local_index is not None:635dist = find(requirement, local_index)636637if dist is None:638if self.to_scan is not None:639self.prescan()640dist = find(requirement)641642if dist is None and not force_scan:643self.find_packages(requirement)644dist = find(requirement)645646if dist is None:647self.warn(648"No local packages or working download links found for %s%s",649(source and "a source distribution of " or ""),650requirement,651)652else:653self.info("Best match: %s", dist)654return dist.clone(location=dist.download_location)655656def fetch(self, requirement, tmpdir, force_scan=False, source=False):657"""Obtain a file suitable for fulfilling `requirement`658659DEPRECATED; use the ``fetch_distribution()`` method now instead. For660backward compatibility, this routine is identical but returns the661``location`` of the downloaded distribution instead of a distribution662object.663"""664dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)665if dist is not None:666return dist.location667return None668669def gen_setup(self, filename, fragment, tmpdir):670match = EGG_FRAGMENT.match(fragment)671dists = match and [672d for d in673interpret_distro_name(filename, match.group(1), None) if d.version674] or []675676if len(dists) == 1: # unambiguous ``#egg`` fragment677basename = os.path.basename(filename)678679# Make sure the file has been downloaded to the temp dir.680if os.path.dirname(filename) != tmpdir:681dst = os.path.join(tmpdir, basename)682if not (os.path.exists(dst) and os.path.samefile(filename, dst)):683shutil.copy2(filename, dst)684filename = dst685686with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:687file.write(688"from setuptools import setup\n"689"setup(name=%r, version=%r, py_modules=[%r])\n"690% (691dists[0].project_name, dists[0].version,692os.path.splitext(basename)[0]693)694)695return filename696697elif match:698raise DistutilsError(699"Can't unambiguously interpret project/version identifier %r; "700"any dashes in the name or version should be escaped using "701"underscores. %r" % (fragment, dists)702)703else:704raise DistutilsError(705"Can't process plain .py files without an '#egg=name-version'"706" suffix to enable automatic setup script generation."707)708709dl_blocksize = 8192710711def _download_to(self, url, filename):712self.info("Downloading %s", url)713# Download the file714fp = None715try:716checker = HashChecker.from_url(url)717fp = self.open_url(url)718if isinstance(fp, urllib.error.HTTPError):719raise DistutilsError(720"Can't download %s: %s %s" % (url, fp.code, fp.msg)721)722headers = fp.info()723blocknum = 0724bs = self.dl_blocksize725size = -1726if "content-length" in headers:727# Some servers return multiple Content-Length headers :(728sizes = headers.get_all('Content-Length')729size = max(map(int, sizes))730self.reporthook(url, filename, blocknum, bs, size)731with open(filename, 'wb') as tfp:732while True:733block = fp.read(bs)734if block:735checker.feed(block)736tfp.write(block)737blocknum += 1738self.reporthook(url, filename, blocknum, bs, size)739else:740break741self.check_hash(checker, filename, tfp)742return headers743finally:744if fp:745fp.close()746747def reporthook(self, url, filename, blocknum, blksize, size):748pass # no-op749750# FIXME:751def open_url(self, url, warning=None): # noqa: C901 # is too complex (12)752if url.startswith('file:'):753return local_open(url)754try:755return open_with_auth(url, self.opener)756except (ValueError, http.client.InvalidURL) as v:757msg = ' '.join([str(arg) for arg in v.args])758if warning:759self.warn(warning, msg)760else:761raise DistutilsError('%s %s' % (url, msg)) from v762except urllib.error.HTTPError as v:763return v764except urllib.error.URLError as v:765if warning:766self.warn(warning, v.reason)767else:768raise DistutilsError("Download error for %s: %s"769% (url, v.reason)) from v770except http.client.BadStatusLine as v:771if warning:772self.warn(warning, v.line)773else:774raise DistutilsError(775'%s returned a bad status line. The server might be '776'down, %s' %777(url, v.line)778) from v779except (http.client.HTTPException, socket.error) as v:780if warning:781self.warn(warning, v)782else:783raise DistutilsError("Download error for %s: %s"784% (url, v)) from v785786def _download_url(self, scheme, url, tmpdir):787# Determine download filename788#789name, fragment = egg_info_for_url(url)790if name:791while '..' in name:792name = name.replace('..', '.').replace('\\', '_')793else:794name = "__downloaded__" # default if URL has no path contents795796if name.endswith('.egg.zip'):797name = name[:-4] # strip the extra .zip before download798799filename = os.path.join(tmpdir, name)800801# Download the file802#803if scheme == 'svn' or scheme.startswith('svn+'):804return self._download_svn(url, filename)805elif scheme == 'git' or scheme.startswith('git+'):806return self._download_git(url, filename)807elif scheme.startswith('hg+'):808return self._download_hg(url, filename)809elif scheme == 'file':810return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])811else:812self.url_ok(url, True) # raises error if not allowed813return self._attempt_download(url, filename)814815def scan_url(self, url):816self.process_url(url, True)817818def _attempt_download(self, url, filename):819headers = self._download_to(url, filename)820if 'html' in headers.get('content-type', '').lower():821return self._download_html(url, headers, filename)822else:823return filename824825def _download_html(self, url, headers, filename):826file = open(filename)827for line in file:828if line.strip():829# Check for a subversion index page830if re.search(r'<title>([^- ]+ - )?Revision \d+:', line):831# it's a subversion index page:832file.close()833os.unlink(filename)834return self._download_svn(url, filename)835break # not an index page836file.close()837os.unlink(filename)838raise DistutilsError("Unexpected HTML page found at " + url)839840def _download_svn(self, url, filename):841warnings.warn("SVN download support is deprecated", UserWarning)842url = url.split('#', 1)[0] # remove any fragment for svn's sake843creds = ''844if url.lower().startswith('svn:') and '@' in url:845scheme, netloc, path, p, q, f = urllib.parse.urlparse(url)846if not netloc and path.startswith('//') and '/' in path[2:]:847netloc, path = path[2:].split('/', 1)848auth, host = _splituser(netloc)849if auth:850if ':' in auth:851user, pw = auth.split(':', 1)852creds = " --username=%s --password=%s" % (user, pw)853else:854creds = " --username=" + auth855netloc = host856parts = scheme, netloc, url, p, q, f857url = urllib.parse.urlunparse(parts)858self.info("Doing subversion checkout from %s to %s", url, filename)859os.system("svn checkout%s -q %s %s" % (creds, url, filename))860return filename861862@staticmethod863def _vcs_split_rev_from_url(url, pop_prefix=False):864scheme, netloc, path, query, frag = urllib.parse.urlsplit(url)865866scheme = scheme.split('+', 1)[-1]867868# Some fragment identification fails869path = path.split('#', 1)[0]870871rev = None872if '@' in path:873path, rev = path.rsplit('@', 1)874875# Also, discard fragment876url = urllib.parse.urlunsplit((scheme, netloc, path, query, ''))877878return url, rev879880def _download_git(self, url, filename):881filename = filename.split('#', 1)[0]882url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)883884self.info("Doing git clone from %s to %s", url, filename)885os.system("git clone --quiet %s %s" % (url, filename))886887if rev is not None:888self.info("Checking out %s", rev)889os.system("git -C %s checkout --quiet %s" % (890filename,891rev,892))893894return filename895896def _download_hg(self, url, filename):897filename = filename.split('#', 1)[0]898url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)899900self.info("Doing hg clone from %s to %s", url, filename)901os.system("hg clone --quiet %s %s" % (url, filename))902903if rev is not None:904self.info("Updating to %s", rev)905os.system("hg --cwd %s up -C -r %s -q" % (906filename,907rev,908))909910return filename911912def debug(self, msg, *args):913log.debug(msg, *args)914915def info(self, msg, *args):916log.info(msg, *args)917918def warn(self, msg, *args):919log.warn(msg, *args)920921922# This pattern matches a character entity reference (a decimal numeric923# references, a hexadecimal numeric reference, or a named reference).924entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub925926927def decode_entity(match):928what = match.group(0)929return html.unescape(what)930931932def htmldecode(text):933"""934Decode HTML entities in the given text.935936>>> htmldecode(937... 'https://../package_name-0.1.2.tar.gz'938... '?tokena=A&tokenb=B">package_name-0.1.2.tar.gz')939'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'940"""941return entity_sub(decode_entity, text)942943944def socket_timeout(timeout=15):945def _socket_timeout(func):946def _socket_timeout(*args, **kwargs):947old_timeout = socket.getdefaulttimeout()948socket.setdefaulttimeout(timeout)949try:950return func(*args, **kwargs)951finally:952socket.setdefaulttimeout(old_timeout)953954return _socket_timeout955956return _socket_timeout957958959def _encode_auth(auth):960"""961Encode auth from a URL suitable for an HTTP header.962>>> str(_encode_auth('username%3Apassword'))963'dXNlcm5hbWU6cGFzc3dvcmQ='964965Long auth strings should not cause a newline to be inserted.966>>> long_auth = 'username:' + 'password'*10967>>> chr(10) in str(_encode_auth(long_auth))968False969"""970auth_s = urllib.parse.unquote(auth)971# convert to bytes972auth_bytes = auth_s.encode()973encoded_bytes = base64.b64encode(auth_bytes)974# convert back to a string975encoded = encoded_bytes.decode()976# strip the trailing carriage return977return encoded.replace('\n', '')978979980class Credential:981"""982A username/password pair. Use like a namedtuple.983"""984985def __init__(self, username, password):986self.username = username987self.password = password988989def __iter__(self):990yield self.username991yield self.password992993def __str__(self):994return '%(username)s:%(password)s' % vars(self)995996997class PyPIConfig(configparser.RawConfigParser):998def __init__(self):999"""1000Load from ~/.pypirc1001"""1002defaults = dict.fromkeys(['username', 'password', 'repository'], '')1003super().__init__(defaults)10041005rc = os.path.join(os.path.expanduser('~'), '.pypirc')1006if os.path.exists(rc):1007self.read(rc)10081009@property1010def creds_by_repository(self):1011sections_with_repositories = [1012section for section in self.sections()1013if self.get(section, 'repository').strip()1014]10151016return dict(map(self._get_repo_cred, sections_with_repositories))10171018def _get_repo_cred(self, section):1019repo = self.get(section, 'repository').strip()1020return repo, Credential(1021self.get(section, 'username').strip(),1022self.get(section, 'password').strip(),1023)10241025def find_credential(self, url):1026"""1027If the URL indicated appears to be a repository defined in this1028config, return the credential for that repository.1029"""1030for repository, cred in self.creds_by_repository.items():1031if url.startswith(repository):1032return cred103310341035def open_with_auth(url, opener=urllib.request.urlopen):1036"""Open a urllib2 request, handling HTTP authentication"""10371038parsed = urllib.parse.urlparse(url)1039scheme, netloc, path, params, query, frag = parsed10401041# Double scheme does not raise on macOS as revealed by a1042# failing test. We would expect "nonnumeric port". Refs #20.1043if netloc.endswith(':'):1044raise http.client.InvalidURL("nonnumeric port: ''")10451046if scheme in ('http', 'https'):1047auth, address = _splituser(netloc)1048else:1049auth = None10501051if not auth:1052cred = PyPIConfig().find_credential(url)1053if cred:1054auth = str(cred)1055info = cred.username, url1056log.info('Authenticating as %s for %s (from .pypirc)', *info)10571058if auth:1059auth = "Basic " + _encode_auth(auth)1060parts = scheme, address, path, params, query, frag1061new_url = urllib.parse.urlunparse(parts)1062request = urllib.request.Request(new_url)1063request.add_header("Authorization", auth)1064else:1065request = urllib.request.Request(url)10661067request.add_header('User-Agent', user_agent)1068fp = opener(request)10691070if auth:1071# Put authentication info back into request URL if same host,1072# so that links found on the page will work1073s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)1074if s2 == scheme and h2 == address:1075parts = s2, netloc, path2, param2, query2, frag21076fp.url = urllib.parse.urlunparse(parts)10771078return fp107910801081# copy of urllib.parse._splituser from Python 3.81082def _splituser(host):1083"""splituser('user[:passwd]@host[:port]')1084--> 'user[:passwd]', 'host[:port]'."""1085user, delim, host = host.rpartition('@')1086return (user if delim else None), host108710881089# adding a timeout to avoid freezing package_index1090open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)109110921093def fix_sf_url(url):1094return url # backward compatibility109510961097def local_open(url):1098"""Read a local path, with special support for directories"""1099scheme, server, path, param, query, frag = urllib.parse.urlparse(url)1100filename = urllib.request.url2pathname(path)1101if os.path.isfile(filename):1102return urllib.request.urlopen(url)1103elif path.endswith('/') and os.path.isdir(filename):1104files = []1105for f in os.listdir(filename):1106filepath = os.path.join(filename, f)1107if f == 'index.html':1108with open(filepath, 'r') as fp:1109body = fp.read()1110break1111elif os.path.isdir(filepath):1112f += '/'1113files.append('<a href="{name}">{name}</a>'.format(name=f))1114else:1115tmpl = (1116"<html><head><title>{url}</title>"1117"</head><body>{files}</body></html>")1118body = tmpl.format(url=url, files='\n'.join(files))1119status, message = 200, "OK"1120else:1121status, message, body = 404, "Path not found", "Not found"11221123headers = {'content-type': 'text/html'}1124body_stream = io.StringIO(body)1125return urllib.error.HTTPError(url, status, message, headers, body_stream)112611271128