Path: blob/main/test/lib/python3.9/site-packages/pip/_internal/models/link.py
4804 views
import functools1import logging2import os3import posixpath4import re5import urllib.parse6from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union78from pip._internal.utils.filetypes import WHEEL_EXTENSION9from pip._internal.utils.hashes import Hashes10from pip._internal.utils.misc import (11redact_auth_from_url,12split_auth_from_netloc,13splitext,14)15from pip._internal.utils.models import KeyBasedCompareMixin16from pip._internal.utils.urls import path_to_url, url_to_path1718if TYPE_CHECKING:19from pip._internal.index.collector import HTMLPage2021logger = logging.getLogger(__name__)222324_SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5")252627class Link(KeyBasedCompareMixin):28"""Represents a parsed link from a Package Index's simple URL"""2930__slots__ = [31"_parsed_url",32"_url",33"comes_from",34"requires_python",35"yanked_reason",36"cache_link_parsing",37]3839def __init__(40self,41url: str,42comes_from: Optional[Union[str, "HTMLPage"]] = None,43requires_python: Optional[str] = None,44yanked_reason: Optional[str] = None,45cache_link_parsing: bool = True,46) -> None:47"""48:param url: url of the resource pointed to (href of the link)49:param comes_from: instance of HTMLPage where the link was found,50or string.51:param requires_python: String containing the `Requires-Python`52metadata field, specified in PEP 345. This may be specified by53a data-requires-python attribute in the HTML link tag, as54described in PEP 503.55:param yanked_reason: the reason the file has been yanked, if the56file has been yanked, or None if the file hasn't been yanked.57This is the value of the "data-yanked" attribute, if present, in58a simple repository HTML link. If the file has been yanked but59no reason was provided, this should be the empty string. See60PEP 592 for more information and the specification.61:param cache_link_parsing: A flag that is used elsewhere to determine62whether resources retrieved from this link63should be cached. PyPI index urls should64generally have this set to False, for65example.66"""6768# url can be a UNC windows share69if url.startswith("\\\\"):70url = path_to_url(url)7172self._parsed_url = urllib.parse.urlsplit(url)73# Store the url as a private attribute to prevent accidentally74# trying to set a new value.75self._url = url7677self.comes_from = comes_from78self.requires_python = requires_python if requires_python else None79self.yanked_reason = yanked_reason8081super().__init__(key=url, defining_class=Link)8283self.cache_link_parsing = cache_link_parsing8485def __str__(self) -> str:86if self.requires_python:87rp = f" (requires-python:{self.requires_python})"88else:89rp = ""90if self.comes_from:91return "{} (from {}){}".format(92redact_auth_from_url(self._url), self.comes_from, rp93)94else:95return redact_auth_from_url(str(self._url))9697def __repr__(self) -> str:98return f"<Link {self}>"99100@property101def url(self) -> str:102return self._url103104@property105def filename(self) -> str:106path = self.path.rstrip("/")107name = posixpath.basename(path)108if not name:109# Make sure we don't leak auth information if the netloc110# includes a username and password.111netloc, user_pass = split_auth_from_netloc(self.netloc)112return netloc113114name = urllib.parse.unquote(name)115assert name, f"URL {self._url!r} produced no filename"116return name117118@property119def file_path(self) -> str:120return url_to_path(self.url)121122@property123def scheme(self) -> str:124return self._parsed_url.scheme125126@property127def netloc(self) -> str:128"""129This can contain auth information.130"""131return self._parsed_url.netloc132133@property134def path(self) -> str:135return urllib.parse.unquote(self._parsed_url.path)136137def splitext(self) -> Tuple[str, str]:138return splitext(posixpath.basename(self.path.rstrip("/")))139140@property141def ext(self) -> str:142return self.splitext()[1]143144@property145def url_without_fragment(self) -> str:146scheme, netloc, path, query, fragment = self._parsed_url147return urllib.parse.urlunsplit((scheme, netloc, path, query, ""))148149_egg_fragment_re = re.compile(r"[#&]egg=([^&]*)")150151@property152def egg_fragment(self) -> Optional[str]:153match = self._egg_fragment_re.search(self._url)154if not match:155return None156return match.group(1)157158_subdirectory_fragment_re = re.compile(r"[#&]subdirectory=([^&]*)")159160@property161def subdirectory_fragment(self) -> Optional[str]:162match = self._subdirectory_fragment_re.search(self._url)163if not match:164return None165return match.group(1)166167_hash_re = re.compile(168r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES))169)170171@property172def hash(self) -> Optional[str]:173match = self._hash_re.search(self._url)174if match:175return match.group(2)176return None177178@property179def hash_name(self) -> Optional[str]:180match = self._hash_re.search(self._url)181if match:182return match.group(1)183return None184185@property186def show_url(self) -> str:187return posixpath.basename(self._url.split("#", 1)[0].split("?", 1)[0])188189@property190def is_file(self) -> bool:191return self.scheme == "file"192193def is_existing_dir(self) -> bool:194return self.is_file and os.path.isdir(self.file_path)195196@property197def is_wheel(self) -> bool:198return self.ext == WHEEL_EXTENSION199200@property201def is_vcs(self) -> bool:202from pip._internal.vcs import vcs203204return self.scheme in vcs.all_schemes205206@property207def is_yanked(self) -> bool:208return self.yanked_reason is not None209210@property211def has_hash(self) -> bool:212return self.hash_name is not None213214def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:215"""216Return True if the link has a hash and it is allowed.217"""218if hashes is None or not self.has_hash:219return False220# Assert non-None so mypy knows self.hash_name and self.hash are str.221assert self.hash_name is not None222assert self.hash is not None223224return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)225226227class _CleanResult(NamedTuple):228"""Convert link for equivalency check.229230This is used in the resolver to check whether two URL-specified requirements231likely point to the same distribution and can be considered equivalent. This232equivalency logic avoids comparing URLs literally, which can be too strict233(e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users.234235Currently this does three things:2362371. Drop the basic auth part. This is technically wrong since a server can238serve different content based on auth, but if it does that, it is even239impossible to guarantee two URLs without auth are equivalent, since240the user can input different auth information when prompted. So the241practical solution is to assume the auth doesn't affect the response.2422. Parse the query to avoid the ordering issue. Note that ordering under the243same key in the query are NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are244still considered different.2453. Explicitly drop most of the fragment part, except ``subdirectory=`` and246hash values, since it should have no impact the downloaded content. Note247that this drops the "egg=" part historically used to denote the requested248project (and extras), which is wrong in the strictest sense, but too many249people are supplying it inconsistently to cause superfluous resolution250conflicts, so we choose to also ignore them.251"""252253parsed: urllib.parse.SplitResult254query: Dict[str, List[str]]255subdirectory: str256hashes: Dict[str, str]257258259def _clean_link(link: Link) -> _CleanResult:260parsed = link._parsed_url261netloc = parsed.netloc.rsplit("@", 1)[-1]262# According to RFC 8089, an empty host in file: means localhost.263if parsed.scheme == "file" and not netloc:264netloc = "localhost"265fragment = urllib.parse.parse_qs(parsed.fragment)266if "egg" in fragment:267logger.debug("Ignoring egg= fragment in %s", link)268try:269# If there are multiple subdirectory values, use the first one.270# This matches the behavior of Link.subdirectory_fragment.271subdirectory = fragment["subdirectory"][0]272except (IndexError, KeyError):273subdirectory = ""274# If there are multiple hash values under the same algorithm, use the275# first one. This matches the behavior of Link.hash_value.276hashes = {k: fragment[k][0] for k in _SUPPORTED_HASHES if k in fragment}277return _CleanResult(278parsed=parsed._replace(netloc=netloc, query="", fragment=""),279query=urllib.parse.parse_qs(parsed.query),280subdirectory=subdirectory,281hashes=hashes,282)283284285@functools.lru_cache(maxsize=None)286def links_equivalent(link1: Link, link2: Link) -> bool:287return _clean_link(link1) == _clean_link(link2)288289290