Path: blob/master/ invest-robot-contest_TinkoffBotTwitch-main/venv/lib/python3.8/site-packages/setuptools/package_index.py
7763 views
"""PyPI and direct package downloading"""1import sys2import os3import re4import shutil5import socket6import base647import hashlib8import itertools9import warnings10from functools import wraps1112from setuptools.extern import six13from setuptools.extern.six.moves import urllib, http_client, configparser, map1415import setuptools16from pkg_resources import (17CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,18Environment, find_distributions, safe_name, safe_version,19to_filename, Requirement, DEVELOP_DIST, EGG_DIST,20)21from setuptools import ssl_support22from distutils import log23from distutils.errors import DistutilsError24from fnmatch import translate25from setuptools.py27compat import get_all_headers26from setuptools.py33compat import unescape27from setuptools.wheel import Wheel2829__metaclass__ = type3031EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')32HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)33PYPI_MD5 = re.compile(34r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'35r'href="[^?]+\?:action=show_md5&digest=([0-9a-f]{32})">md5</a>\)'36)37URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match38EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()3940__all__ = [41'PackageIndex', 'distros_for_url', 'parse_bdist_wininst',42'interpret_distro_name',43]4445_SOCKET_TIMEOUT = 154647_tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"48user_agent = _tmpl.format(49py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools)505152def parse_requirement_arg(spec):53try:54return Requirement.parse(spec)55except ValueError as e:56raise DistutilsError(57"Not a URL, existing file, or requirement spec: %r" % (spec,)58) from e596061def parse_bdist_wininst(name):62"""Return (base,pyversion) or (None,None) for possible .exe name"""6364lower = name.lower()65base, py_ver, plat = None, None, None6667if lower.endswith('.exe'):68if lower.endswith('.win32.exe'):69base = name[:-10]70plat = 'win32'71elif lower.startswith('.win32-py', -16):72py_ver = name[-7:-4]73base = name[:-16]74plat = 'win32'75elif lower.endswith('.win-amd64.exe'):76base = name[:-14]77plat = 'win-amd64'78elif lower.startswith('.win-amd64-py', -20):79py_ver = name[-7:-4]80base = name[:-20]81plat = 'win-amd64'82return base, py_ver, plat838485def egg_info_for_url(url):86parts = urllib.parse.urlparse(url)87scheme, server, path, parameters, query, fragment = parts88base = urllib.parse.unquote(path.split('/')[-1])89if server == 'sourceforge.net' and base == 'download': # XXX Yuck90base = urllib.parse.unquote(path.split('/')[-2])91if '#' in base:92base, fragment = base.split('#', 1)93return base, fragment949596def distros_for_url(url, metadata=None):97"""Yield egg or source distribution objects that might be found at a URL"""98base, fragment = egg_info_for_url(url)99for dist in distros_for_location(url, base, metadata):100yield dist101if fragment:102match = EGG_FRAGMENT.match(fragment)103if match:104for dist in interpret_distro_name(105url, match.group(1), metadata, precedence=CHECKOUT_DIST106):107yield dist108109110def distros_for_location(location, basename, metadata=None):111"""Yield egg or source distribution objects based on basename"""112if basename.endswith('.egg.zip'):113basename = basename[:-4] # strip the .zip114if basename.endswith('.egg') and '-' in basename:115# only one, unambiguous interpretation116return [Distribution.from_location(location, basename, metadata)]117if basename.endswith('.whl') and '-' in basename:118wheel = Wheel(basename)119if not wheel.is_compatible():120return []121return [Distribution(122location=location,123project_name=wheel.project_name,124version=wheel.version,125# Increase priority over eggs.126precedence=EGG_DIST + 1,127)]128if basename.endswith('.exe'):129win_base, py_ver, platform = parse_bdist_wininst(basename)130if win_base is not None:131return interpret_distro_name(132location, win_base, metadata, py_ver, BINARY_DIST, platform133)134# Try source distro extensions (.zip, .tgz, etc.)135#136for ext in EXTENSIONS:137if basename.endswith(ext):138basename = basename[:-len(ext)]139return interpret_distro_name(location, basename, metadata)140return [] # no extension matched141142143def distros_for_filename(filename, metadata=None):144"""Yield possible egg or source distribution objects based on a filename"""145return distros_for_location(146normalize_path(filename), os.path.basename(filename), metadata147)148149150def interpret_distro_name(151location, basename, metadata, py_version=None, precedence=SOURCE_DIST,152platform=None153):154"""Generate alternative interpretations of a source distro name155156Note: if `location` is a filesystem filename, you should call157``pkg_resources.normalize_path()`` on it before passing it to this158routine!159"""160# Generate alternative interpretations of a source distro name161# Because some packages are ambiguous as to name/versions split162# e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.163# So, we generate each possible interepretation (e.g. "adns, python-1.1.0"164# "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice,165# the spurious interpretations should be ignored, because in the event166# there's also an "adns" package, the spurious "python-1.1.0" version will167# compare lower than any numeric version number, and is therefore unlikely168# to match a request for it. It's still a potential problem, though, and169# in the long run PyPI and the distutils should go for "safe" names and170# versions in distribution archive names (sdist and bdist).171172parts = basename.split('-')173if not py_version and any(re.match(r'py\d\.\d$', p) for p in parts[2:]):174# it is a bdist_dumb, not an sdist -- bail out175return176177for p in range(1, len(parts) + 1):178yield Distribution(179location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),180py_version=py_version, precedence=precedence,181platform=platform182)183184185# From Python 2.7 docs186def unique_everseen(iterable, key=None):187"List unique elements, preserving order. Remember all elements ever seen."188# unique_everseen('AAAABBBCCDAABBB') --> A B C D189# unique_everseen('ABBCcAD', str.lower) --> A B C D190seen = set()191seen_add = seen.add192if key is None:193for element in six.moves.filterfalse(seen.__contains__, iterable):194seen_add(element)195yield element196else:197for element in iterable:198k = key(element)199if k not in seen:200seen_add(k)201yield element202203204def unique_values(func):205"""206Wrap a function returning an iterable such that the resulting iterable207only ever yields unique items.208"""209210@wraps(func)211def wrapper(*args, **kwargs):212return unique_everseen(func(*args, **kwargs))213214return wrapper215216217REL = re.compile(r"""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)218# this line is here to fix emacs' cruddy broken syntax highlighting219220221@unique_values222def find_external_links(url, page):223"""Find rel="homepage" and rel="download" links in `page`, yielding URLs"""224225for match in REL.finditer(page):226tag, rel = match.groups()227rels = set(map(str.strip, rel.lower().split(',')))228if 'homepage' in rels or 'download' in rels:229for match in HREF.finditer(tag):230yield urllib.parse.urljoin(url, htmldecode(match.group(1)))231232for tag in ("<th>Home Page", "<th>Download URL"):233pos = page.find(tag)234if pos != -1:235match = HREF.search(page, pos)236if match:237yield urllib.parse.urljoin(url, htmldecode(match.group(1)))238239240class ContentChecker:241"""242A null content checker that defines the interface for checking content243"""244245def feed(self, block):246"""247Feed a block of data to the hash.248"""249return250251def is_valid(self):252"""253Check the hash. Return False if validation fails.254"""255return True256257def report(self, reporter, template):258"""259Call reporter with information about the checker (hash name)260substituted into the template.261"""262return263264265class HashChecker(ContentChecker):266pattern = re.compile(267r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='268r'(?P<expected>[a-f0-9]+)'269)270271def __init__(self, hash_name, expected):272self.hash_name = hash_name273self.hash = hashlib.new(hash_name)274self.expected = expected275276@classmethod277def from_url(cls, url):278"Construct a (possibly null) ContentChecker from a URL"279fragment = urllib.parse.urlparse(url)[-1]280if not fragment:281return ContentChecker()282match = cls.pattern.search(fragment)283if not match:284return ContentChecker()285return cls(**match.groupdict())286287def feed(self, block):288self.hash.update(block)289290def is_valid(self):291return self.hash.hexdigest() == self.expected292293def report(self, reporter, template):294msg = template % self.hash_name295return reporter(msg)296297298class PackageIndex(Environment):299"""A distribution index that scans web pages for download URLs"""300301def __init__(302self, index_url="https://pypi.org/simple/", hosts=('*',),303ca_bundle=None, verify_ssl=True, *args, **kw304):305Environment.__init__(self, *args, **kw)306self.index_url = index_url + "/" [:not index_url.endswith('/')]307self.scanned_urls = {}308self.fetched_urls = {}309self.package_pages = {}310self.allows = re.compile('|'.join(map(translate, hosts))).match311self.to_scan = []312use_ssl = (313verify_ssl314and ssl_support.is_available315and (ca_bundle or ssl_support.find_ca_bundle())316)317if use_ssl:318self.opener = ssl_support.opener_for(ca_bundle)319else:320self.opener = urllib.request.urlopen321322def process_url(self, url, retrieve=False):323"""Evaluate a URL as a possible download, and maybe retrieve it"""324if url in self.scanned_urls and not retrieve:325return326self.scanned_urls[url] = True327if not URL_SCHEME(url):328self.process_filename(url)329return330else:331dists = list(distros_for_url(url))332if dists:333if not self.url_ok(url):334return335self.debug("Found link: %s", url)336337if dists or not retrieve or url in self.fetched_urls:338list(map(self.add, dists))339return # don't need the actual page340341if not self.url_ok(url):342self.fetched_urls[url] = True343return344345self.info("Reading %s", url)346self.fetched_urls[url] = True # prevent multiple fetch attempts347tmpl = "Download error on %s: %%s -- Some packages may not be found!"348f = self.open_url(url, tmpl % url)349if f is None:350return351if isinstance(f, urllib.error.HTTPError) and f.code == 401:352self.info("Authentication error: %s" % f.msg)353self.fetched_urls[f.url] = True354if 'html' not in f.headers.get('content-type', '').lower():355f.close() # not html, we can't process it356return357358base = f.url # handle redirects359page = f.read()360if not isinstance(page, str):361# In Python 3 and got bytes but want str.362if isinstance(f, urllib.error.HTTPError):363# Errors have no charset, assume latin1:364charset = 'latin-1'365else:366charset = f.headers.get_param('charset') or 'latin-1'367page = page.decode(charset, "ignore")368f.close()369for match in HREF.finditer(page):370link = urllib.parse.urljoin(base, htmldecode(match.group(1)))371self.process_url(link)372if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:373page = self.process_index(url, page)374375def process_filename(self, fn, nested=False):376# process filenames or directories377if not os.path.exists(fn):378self.warn("Not found: %s", fn)379return380381if os.path.isdir(fn) and not nested:382path = os.path.realpath(fn)383for item in os.listdir(path):384self.process_filename(os.path.join(path, item), True)385386dists = distros_for_filename(fn)387if dists:388self.debug("Found: %s", fn)389list(map(self.add, dists))390391def url_ok(self, url, fatal=False):392s = URL_SCHEME(url)393is_file = s and s.group(1).lower() == 'file'394if is_file or self.allows(urllib.parse.urlparse(url)[1]):395return True396msg = (397"\nNote: Bypassing %s (disallowed host; see "398"http://bit.ly/2hrImnY for details).\n")399if fatal:400raise DistutilsError(msg % url)401else:402self.warn(msg, url)403404def scan_egg_links(self, search_path):405dirs = filter(os.path.isdir, search_path)406egg_links = (407(path, entry)408for path in dirs409for entry in os.listdir(path)410if entry.endswith('.egg-link')411)412list(itertools.starmap(self.scan_egg_link, egg_links))413414def scan_egg_link(self, path, entry):415with open(os.path.join(path, entry)) as raw_lines:416# filter non-empty lines417lines = list(filter(None, map(str.strip, raw_lines)))418419if len(lines) != 2:420# format is not recognized; punt421return422423egg_path, setup_path = lines424425for dist in find_distributions(os.path.join(path, egg_path)):426dist.location = os.path.join(path, *lines)427dist.precedence = SOURCE_DIST428self.add(dist)429430def process_index(self, url, page):431"""Process the contents of a PyPI page"""432433def scan(link):434# Process a URL to see if it's for a package page435if link.startswith(self.index_url):436parts = list(map(437urllib.parse.unquote, link[len(self.index_url):].split('/')438))439if len(parts) == 2 and '#' not in parts[1]:440# it's a package page, sanitize and index it441pkg = safe_name(parts[0])442ver = safe_version(parts[1])443self.package_pages.setdefault(pkg.lower(), {})[link] = True444return to_filename(pkg), to_filename(ver)445return None, None446447# process an index page into the package-page index448for match in HREF.finditer(page):449try:450scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))451except ValueError:452pass453454pkg, ver = scan(url) # ensure this page is in the page index455if pkg:456# process individual package page457for new_url in find_external_links(url, page):458# Process the found URL459base, frag = egg_info_for_url(new_url)460if base.endswith('.py') and not frag:461if ver:462new_url += '#egg=%s-%s' % (pkg, ver)463else:464self.need_version_info(url)465self.scan_url(new_url)466467return PYPI_MD5.sub(468lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page469)470else:471return "" # no sense double-scanning non-package pages472473def need_version_info(self, url):474self.scan_all(475"Page at %s links to .py file(s) without version info; an index "476"scan is required.", url477)478479def scan_all(self, msg=None, *args):480if self.index_url not in self.fetched_urls:481if msg:482self.warn(msg, *args)483self.info(484"Scanning index of all packages (this may take a while)"485)486self.scan_url(self.index_url)487488def find_packages(self, requirement):489self.scan_url(self.index_url + requirement.unsafe_name + '/')490491if not self.package_pages.get(requirement.key):492# Fall back to safe version of the name493self.scan_url(self.index_url + requirement.project_name + '/')494495if not self.package_pages.get(requirement.key):496# We couldn't find the target package, so search the index page too497self.not_found_in_index(requirement)498499for url in list(self.package_pages.get(requirement.key, ())):500# scan each page that might be related to the desired package501self.scan_url(url)502503def obtain(self, requirement, installer=None):504self.prescan()505self.find_packages(requirement)506for dist in self[requirement.key]:507if dist in requirement:508return dist509self.debug("%s does not match %s", requirement, dist)510return super(PackageIndex, self).obtain(requirement, installer)511512def check_hash(self, checker, filename, tfp):513"""514checker is a ContentChecker515"""516checker.report(517self.debug,518"Validating %%s checksum for %s" % filename)519if not checker.is_valid():520tfp.close()521os.unlink(filename)522raise DistutilsError(523"%s validation failed for %s; "524"possible download problem?"525% (checker.hash.name, os.path.basename(filename))526)527528def add_find_links(self, urls):529"""Add `urls` to the list that will be prescanned for searches"""530for url in urls:531if (532self.to_scan is None # if we have already "gone online"533or not URL_SCHEME(url) # or it's a local file/directory534or url.startswith('file:')535or list(distros_for_url(url)) # or a direct package link536):537# then go ahead and process it now538self.scan_url(url)539else:540# otherwise, defer retrieval till later541self.to_scan.append(url)542543def prescan(self):544"""Scan urls scheduled for prescanning (e.g. --find-links)"""545if self.to_scan:546list(map(self.scan_url, self.to_scan))547self.to_scan = None # from now on, go ahead and process immediately548549def not_found_in_index(self, requirement):550if self[requirement.key]: # we've seen at least one distro551meth, msg = self.info, "Couldn't retrieve index page for %r"552else: # no distros seen for this name, might be misspelled553meth, msg = (554self.warn,555"Couldn't find index page for %r (maybe misspelled?)")556meth(msg, requirement.unsafe_name)557self.scan_all()558559def download(self, spec, tmpdir):560"""Locate and/or download `spec` to `tmpdir`, returning a local path561562`spec` may be a ``Requirement`` object, or a string containing a URL,563an existing local filename, or a project/version requirement spec564(i.e. the string form of a ``Requirement`` object). If it is the URL565of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one566that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is567automatically created alongside the downloaded file.568569If `spec` is a ``Requirement`` object or a string containing a570project/version requirement spec, this method returns the location of571a matching distribution (possibly after downloading it to `tmpdir`).572If `spec` is a locally existing file or directory name, it is simply573returned unchanged. If `spec` is a URL, it is downloaded to a subpath574of `tmpdir`, and the local filename is returned. Various errors may be575raised if a problem occurs during downloading.576"""577if not isinstance(spec, Requirement):578scheme = URL_SCHEME(spec)579if scheme:580# It's a url, download it to tmpdir581found = self._download_url(scheme.group(1), spec, tmpdir)582base, fragment = egg_info_for_url(spec)583if base.endswith('.py'):584found = self.gen_setup(found, fragment, tmpdir)585return found586elif os.path.exists(spec):587# Existing file or directory, just return it588return spec589else:590spec = parse_requirement_arg(spec)591return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)592593def fetch_distribution(594self, requirement, tmpdir, force_scan=False, source=False,595develop_ok=False, local_index=None):596"""Obtain a distribution suitable for fulfilling `requirement`597598`requirement` must be a ``pkg_resources.Requirement`` instance.599If necessary, or if the `force_scan` flag is set, the requirement is600searched for in the (online) package index as well as the locally601installed packages. If a distribution matching `requirement` is found,602the returned distribution's ``location`` is the value you would have603gotten from calling the ``download()`` method with the matching604distribution's URL or filename. If no matching distribution is found,605``None`` is returned.606607If the `source` flag is set, only source distributions and source608checkout links will be considered. Unless the `develop_ok` flag is609set, development and system eggs (i.e., those using the ``.egg-info``610format) will be ignored.611"""612# process a Requirement613self.info("Searching for %s", requirement)614skipped = {}615dist = None616617def find(req, env=None):618if env is None:619env = self620# Find a matching distribution; may be called more than once621622for dist in env[req.key]:623624if dist.precedence == DEVELOP_DIST and not develop_ok:625if dist not in skipped:626self.warn(627"Skipping development or system egg: %s", dist,628)629skipped[dist] = 1630continue631632test = (633dist in req634and (dist.precedence <= SOURCE_DIST or not source)635)636if test:637loc = self.download(dist.location, tmpdir)638dist.download_location = loc639if os.path.exists(dist.download_location):640return dist641642if force_scan:643self.prescan()644self.find_packages(requirement)645dist = find(requirement)646647if not dist and local_index is not None:648dist = find(requirement, local_index)649650if dist is None:651if self.to_scan is not None:652self.prescan()653dist = find(requirement)654655if dist is None and not force_scan:656self.find_packages(requirement)657dist = find(requirement)658659if dist is None:660self.warn(661"No local packages or working download links found for %s%s",662(source and "a source distribution of " or ""),663requirement,664)665else:666self.info("Best match: %s", dist)667return dist.clone(location=dist.download_location)668669def fetch(self, requirement, tmpdir, force_scan=False, source=False):670"""Obtain a file suitable for fulfilling `requirement`671672DEPRECATED; use the ``fetch_distribution()`` method now instead. For673backward compatibility, this routine is identical but returns the674``location`` of the downloaded distribution instead of a distribution675object.676"""677dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)678if dist is not None:679return dist.location680return None681682def gen_setup(self, filename, fragment, tmpdir):683match = EGG_FRAGMENT.match(fragment)684dists = match and [685d for d in686interpret_distro_name(filename, match.group(1), None) if d.version687] or []688689if len(dists) == 1: # unambiguous ``#egg`` fragment690basename = os.path.basename(filename)691692# Make sure the file has been downloaded to the temp dir.693if os.path.dirname(filename) != tmpdir:694dst = os.path.join(tmpdir, basename)695from setuptools.command.easy_install import samefile696if not samefile(filename, dst):697shutil.copy2(filename, dst)698filename = dst699700with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:701file.write(702"from setuptools import setup\n"703"setup(name=%r, version=%r, py_modules=[%r])\n"704% (705dists[0].project_name, dists[0].version,706os.path.splitext(basename)[0]707)708)709return filename710711elif match:712raise DistutilsError(713"Can't unambiguously interpret project/version identifier %r; "714"any dashes in the name or version should be escaped using "715"underscores. %r" % (fragment, dists)716)717else:718raise DistutilsError(719"Can't process plain .py files without an '#egg=name-version'"720" suffix to enable automatic setup script generation."721)722723dl_blocksize = 8192724725def _download_to(self, url, filename):726self.info("Downloading %s", url)727# Download the file728fp = None729try:730checker = HashChecker.from_url(url)731fp = self.open_url(url)732if isinstance(fp, urllib.error.HTTPError):733raise DistutilsError(734"Can't download %s: %s %s" % (url, fp.code, fp.msg)735)736headers = fp.info()737blocknum = 0738bs = self.dl_blocksize739size = -1740if "content-length" in headers:741# Some servers return multiple Content-Length headers :(742sizes = get_all_headers(headers, 'Content-Length')743size = max(map(int, sizes))744self.reporthook(url, filename, blocknum, bs, size)745with open(filename, 'wb') as tfp:746while True:747block = fp.read(bs)748if block:749checker.feed(block)750tfp.write(block)751blocknum += 1752self.reporthook(url, filename, blocknum, bs, size)753else:754break755self.check_hash(checker, filename, tfp)756return headers757finally:758if fp:759fp.close()760761def reporthook(self, url, filename, blocknum, blksize, size):762pass # no-op763764def open_url(self, url, warning=None):765if url.startswith('file:'):766return local_open(url)767try:768return open_with_auth(url, self.opener)769except (ValueError, http_client.InvalidURL) as v:770msg = ' '.join([str(arg) for arg in v.args])771if warning:772self.warn(warning, msg)773else:774raise DistutilsError('%s %s' % (url, msg)) from v775except urllib.error.HTTPError as v:776return v777except urllib.error.URLError as v:778if warning:779self.warn(warning, v.reason)780else:781raise DistutilsError("Download error for %s: %s"782% (url, v.reason)) from v783except http_client.BadStatusLine as v:784if warning:785self.warn(warning, v.line)786else:787raise DistutilsError(788'%s returned a bad status line. The server might be '789'down, %s' %790(url, v.line)791) from v792except (http_client.HTTPException, socket.error) as v:793if warning:794self.warn(warning, v)795else:796raise DistutilsError("Download error for %s: %s"797% (url, v)) from v798799def _download_url(self, scheme, url, tmpdir):800# Determine download filename801#802name, fragment = egg_info_for_url(url)803if name:804while '..' in name:805name = name.replace('..', '.').replace('\\', '_')806else:807name = "__downloaded__" # default if URL has no path contents808809if name.endswith('.egg.zip'):810name = name[:-4] # strip the extra .zip before download811812filename = os.path.join(tmpdir, name)813814# Download the file815#816if scheme == 'svn' or scheme.startswith('svn+'):817return self._download_svn(url, filename)818elif scheme == 'git' or scheme.startswith('git+'):819return self._download_git(url, filename)820elif scheme.startswith('hg+'):821return self._download_hg(url, filename)822elif scheme == 'file':823return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])824else:825self.url_ok(url, True) # raises error if not allowed826return self._attempt_download(url, filename)827828def scan_url(self, url):829self.process_url(url, True)830831def _attempt_download(self, url, filename):832headers = self._download_to(url, filename)833if 'html' in headers.get('content-type', '').lower():834return self._download_html(url, headers, filename)835else:836return filename837838def _download_html(self, url, headers, filename):839file = open(filename)840for line in file:841if line.strip():842# Check for a subversion index page843if re.search(r'<title>([^- ]+ - )?Revision \d+:', line):844# it's a subversion index page:845file.close()846os.unlink(filename)847return self._download_svn(url, filename)848break # not an index page849file.close()850os.unlink(filename)851raise DistutilsError("Unexpected HTML page found at " + url)852853def _download_svn(self, url, filename):854warnings.warn("SVN download support is deprecated", UserWarning)855url = url.split('#', 1)[0] # remove any fragment for svn's sake856creds = ''857if url.lower().startswith('svn:') and '@' in url:858scheme, netloc, path, p, q, f = urllib.parse.urlparse(url)859if not netloc and path.startswith('//') and '/' in path[2:]:860netloc, path = path[2:].split('/', 1)861auth, host = _splituser(netloc)862if auth:863if ':' in auth:864user, pw = auth.split(':', 1)865creds = " --username=%s --password=%s" % (user, pw)866else:867creds = " --username=" + auth868netloc = host869parts = scheme, netloc, url, p, q, f870url = urllib.parse.urlunparse(parts)871self.info("Doing subversion checkout from %s to %s", url, filename)872os.system("svn checkout%s -q %s %s" % (creds, url, filename))873return filename874875@staticmethod876def _vcs_split_rev_from_url(url, pop_prefix=False):877scheme, netloc, path, query, frag = urllib.parse.urlsplit(url)878879scheme = scheme.split('+', 1)[-1]880881# Some fragment identification fails882path = path.split('#', 1)[0]883884rev = None885if '@' in path:886path, rev = path.rsplit('@', 1)887888# Also, discard fragment889url = urllib.parse.urlunsplit((scheme, netloc, path, query, ''))890891return url, rev892893def _download_git(self, url, filename):894filename = filename.split('#', 1)[0]895url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)896897self.info("Doing git clone from %s to %s", url, filename)898os.system("git clone --quiet %s %s" % (url, filename))899900if rev is not None:901self.info("Checking out %s", rev)902os.system("git -C %s checkout --quiet %s" % (903filename,904rev,905))906907return filename908909def _download_hg(self, url, filename):910filename = filename.split('#', 1)[0]911url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)912913self.info("Doing hg clone from %s to %s", url, filename)914os.system("hg clone --quiet %s %s" % (url, filename))915916if rev is not None:917self.info("Updating to %s", rev)918os.system("hg --cwd %s up -C -r %s -q" % (919filename,920rev,921))922923return filename924925def debug(self, msg, *args):926log.debug(msg, *args)927928def info(self, msg, *args):929log.info(msg, *args)930931def warn(self, msg, *args):932log.warn(msg, *args)933934935# This pattern matches a character entity reference (a decimal numeric936# references, a hexadecimal numeric reference, or a named reference).937entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub938939940def decode_entity(match):941what = match.group(0)942return unescape(what)943944945def htmldecode(text):946"""947Decode HTML entities in the given text.948949>>> htmldecode(950... 'https://../package_name-0.1.2.tar.gz'951... '?tokena=A&tokenb=B">package_name-0.1.2.tar.gz')952'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'953"""954return entity_sub(decode_entity, text)955956957def socket_timeout(timeout=15):958def _socket_timeout(func):959def _socket_timeout(*args, **kwargs):960old_timeout = socket.getdefaulttimeout()961socket.setdefaulttimeout(timeout)962try:963return func(*args, **kwargs)964finally:965socket.setdefaulttimeout(old_timeout)966967return _socket_timeout968969return _socket_timeout970971972def _encode_auth(auth):973"""974A function compatible with Python 2.3-3.3 that will encode975auth from a URL suitable for an HTTP header.976>>> str(_encode_auth('username%3Apassword'))977'dXNlcm5hbWU6cGFzc3dvcmQ='978979Long auth strings should not cause a newline to be inserted.980>>> long_auth = 'username:' + 'password'*10981>>> chr(10) in str(_encode_auth(long_auth))982False983"""984auth_s = urllib.parse.unquote(auth)985# convert to bytes986auth_bytes = auth_s.encode()987encoded_bytes = base64.b64encode(auth_bytes)988# convert back to a string989encoded = encoded_bytes.decode()990# strip the trailing carriage return991return encoded.replace('\n', '')992993994class Credential:995"""996A username/password pair. Use like a namedtuple.997"""998999def __init__(self, username, password):1000self.username = username1001self.password = password10021003def __iter__(self):1004yield self.username1005yield self.password10061007def __str__(self):1008return '%(username)s:%(password)s' % vars(self)100910101011class PyPIConfig(configparser.RawConfigParser):1012def __init__(self):1013"""1014Load from ~/.pypirc1015"""1016defaults = dict.fromkeys(['username', 'password', 'repository'], '')1017configparser.RawConfigParser.__init__(self, defaults)10181019rc = os.path.join(os.path.expanduser('~'), '.pypirc')1020if os.path.exists(rc):1021self.read(rc)10221023@property1024def creds_by_repository(self):1025sections_with_repositories = [1026section for section in self.sections()1027if self.get(section, 'repository').strip()1028]10291030return dict(map(self._get_repo_cred, sections_with_repositories))10311032def _get_repo_cred(self, section):1033repo = self.get(section, 'repository').strip()1034return repo, Credential(1035self.get(section, 'username').strip(),1036self.get(section, 'password').strip(),1037)10381039def find_credential(self, url):1040"""1041If the URL indicated appears to be a repository defined in this1042config, return the credential for that repository.1043"""1044for repository, cred in self.creds_by_repository.items():1045if url.startswith(repository):1046return cred104710481049def open_with_auth(url, opener=urllib.request.urlopen):1050"""Open a urllib2 request, handling HTTP authentication"""10511052parsed = urllib.parse.urlparse(url)1053scheme, netloc, path, params, query, frag = parsed10541055# Double scheme does not raise on macOS as revealed by a1056# failing test. We would expect "nonnumeric port". Refs #20.1057if netloc.endswith(':'):1058raise http_client.InvalidURL("nonnumeric port: ''")10591060if scheme in ('http', 'https'):1061auth, address = _splituser(netloc)1062else:1063auth = None10641065if not auth:1066cred = PyPIConfig().find_credential(url)1067if cred:1068auth = str(cred)1069info = cred.username, url1070log.info('Authenticating as %s for %s (from .pypirc)', *info)10711072if auth:1073auth = "Basic " + _encode_auth(auth)1074parts = scheme, address, path, params, query, frag1075new_url = urllib.parse.urlunparse(parts)1076request = urllib.request.Request(new_url)1077request.add_header("Authorization", auth)1078else:1079request = urllib.request.Request(url)10801081request.add_header('User-Agent', user_agent)1082fp = opener(request)10831084if auth:1085# Put authentication info back into request URL if same host,1086# so that links found on the page will work1087s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)1088if s2 == scheme and h2 == address:1089parts = s2, netloc, path2, param2, query2, frag21090fp.url = urllib.parse.urlunparse(parts)10911092return fp109310941095# copy of urllib.parse._splituser from Python 3.81096def _splituser(host):1097"""splituser('user[:passwd]@host[:port]')1098--> 'user[:passwd]', 'host[:port]'."""1099user, delim, host = host.rpartition('@')1100return (user if delim else None), host110111021103# adding a timeout to avoid freezing package_index1104open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)110511061107def fix_sf_url(url):1108return url # backward compatibility110911101111def local_open(url):1112"""Read a local path, with special support for directories"""1113scheme, server, path, param, query, frag = urllib.parse.urlparse(url)1114filename = urllib.request.url2pathname(path)1115if os.path.isfile(filename):1116return urllib.request.urlopen(url)1117elif path.endswith('/') and os.path.isdir(filename):1118files = []1119for f in os.listdir(filename):1120filepath = os.path.join(filename, f)1121if f == 'index.html':1122with open(filepath, 'r') as fp:1123body = fp.read()1124break1125elif os.path.isdir(filepath):1126f += '/'1127files.append('<a href="{name}">{name}</a>'.format(name=f))1128else:1129tmpl = (1130"<html><head><title>{url}</title>"1131"</head><body>{files}</body></html>")1132body = tmpl.format(url=url, files='\n'.join(files))1133status, message = 200, "OK"1134else:1135status, message, body = 404, "Path not found", "Not found"11361137headers = {'content-type': 'text/html'}1138body_stream = six.StringIO(body)1139return urllib.error.HTTPError(url, status, message, headers, body_stream)114011411142