CoCalc -- link.py

GitHub Repository: keewenaw/ethereum-wallet-cracker
Path: blob/main/test/lib/python3.9/site-packages/pip/_internal/models/link.py
⁵¹⁶⁴ views
1
import functools
2
import logging
3
import os
4
import posixpath
5
import re
6
import urllib.parse
7
from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union
8

9
from pip._internal.utils.filetypes import WHEEL_EXTENSION
10
from pip._internal.utils.hashes import Hashes
11
from pip._internal.utils.misc import (
12
    redact_auth_from_url,
13
    split_auth_from_netloc,
14
    splitext,
15
)
16
from pip._internal.utils.models import KeyBasedCompareMixin
17
from pip._internal.utils.urls import path_to_url, url_to_path
18

19
if TYPE_CHECKING:
20
    from pip._internal.index.collector import HTMLPage
21

22
logger = logging.getLogger(__name__)
23

24

25
_SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5")
26

27

28
class Link(KeyBasedCompareMixin):
29
    """Represents a parsed link from a Package Index's simple URL"""
30

31
    __slots__ = [
32
        "_parsed_url",
33
        "_url",
34
        "comes_from",
35
        "requires_python",
36
        "yanked_reason",
37
        "cache_link_parsing",
38
    ]
39

40
    def __init__(
41
        self,
42
        url: str,
43
        comes_from: Optional[Union[str, "HTMLPage"]] = None,
44
        requires_python: Optional[str] = None,
45
        yanked_reason: Optional[str] = None,
46
        cache_link_parsing: bool = True,
47
    ) -> None:
48
        """
49
        :param url: url of the resource pointed to (href of the link)
50
        :param comes_from: instance of HTMLPage where the link was found,
51
            or string.
52
        :param requires_python: String containing the `Requires-Python`
53
            metadata field, specified in PEP 345. This may be specified by
54
            a data-requires-python attribute in the HTML link tag, as
55
            described in PEP 503.
56
        :param yanked_reason: the reason the file has been yanked, if the
57
            file has been yanked, or None if the file hasn't been yanked.
58
            This is the value of the "data-yanked" attribute, if present, in
59
            a simple repository HTML link. If the file has been yanked but
60
            no reason was provided, this should be the empty string. See
61
            PEP 592 for more information and the specification.
62
        :param cache_link_parsing: A flag that is used elsewhere to determine
63
                                   whether resources retrieved from this link
64
                                   should be cached. PyPI index urls should
65
                                   generally have this set to False, for
66
                                   example.
67
        """
68

69
        # url can be a UNC windows share
70
        if url.startswith("\\\\"):
71
            url = path_to_url(url)
72

73
        self._parsed_url = urllib.parse.urlsplit(url)
74
        # Store the url as a private attribute to prevent accidentally
75
        # trying to set a new value.
76
        self._url = url
77

78
        self.comes_from = comes_from
79
        self.requires_python = requires_python if requires_python else None
80
        self.yanked_reason = yanked_reason
81

82
        super().__init__(key=url, defining_class=Link)
83

84
        self.cache_link_parsing = cache_link_parsing
85

86
    def __str__(self) -> str:
87
        if self.requires_python:
88
            rp = f" (requires-python:{self.requires_python})"
89
        else:
90
            rp = ""
91
        if self.comes_from:
92
            return "{} (from {}){}".format(
93
                redact_auth_from_url(self._url), self.comes_from, rp
94
            )
95
        else:
96
            return redact_auth_from_url(str(self._url))
97

98
    def __repr__(self) -> str:
99
        return f"<Link {self}>"
100

101
    @property
102
    def url(self) -> str:
103
        return self._url
104

105
    @property
106
    def filename(self) -> str:
107
        path = self.path.rstrip("/")
108
        name = posixpath.basename(path)
109
        if not name:
110
            # Make sure we don't leak auth information if the netloc
111
            # includes a username and password.
112
            netloc, user_pass = split_auth_from_netloc(self.netloc)
113
            return netloc
114

115
        name = urllib.parse.unquote(name)
116
        assert name, f"URL {self._url!r} produced no filename"
117
        return name
118

119
    @property
120
    def file_path(self) -> str:
121
        return url_to_path(self.url)
122

123
    @property
124
    def scheme(self) -> str:
125
        return self._parsed_url.scheme
126

127
    @property
128
    def netloc(self) -> str:
129
        """
130
        This can contain auth information.
131
        """
132
        return self._parsed_url.netloc
133

134
    @property
135
    def path(self) -> str:
136
        return urllib.parse.unquote(self._parsed_url.path)
137

138
    def splitext(self) -> Tuple[str, str]:
139
        return splitext(posixpath.basename(self.path.rstrip("/")))
140

141
    @property
142
    def ext(self) -> str:
143
        return self.splitext()[1]
144

145
    @property
146
    def url_without_fragment(self) -> str:
147
        scheme, netloc, path, query, fragment = self._parsed_url
148
        return urllib.parse.urlunsplit((scheme, netloc, path, query, ""))
149

150
    _egg_fragment_re = re.compile(r"[#&]egg=([^&]*)")
151

152
    @property
153
    def egg_fragment(self) -> Optional[str]:
154
        match = self._egg_fragment_re.search(self._url)
155
        if not match:
156
            return None
157
        return match.group(1)
158

159
    _subdirectory_fragment_re = re.compile(r"[#&]subdirectory=([^&]*)")
160

161
    @property
162
    def subdirectory_fragment(self) -> Optional[str]:
163
        match = self._subdirectory_fragment_re.search(self._url)
164
        if not match:
165
            return None
166
        return match.group(1)
167

168
    _hash_re = re.compile(
169
        r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES))
170
    )
171

172
    @property
173
    def hash(self) -> Optional[str]:
174
        match = self._hash_re.search(self._url)
175
        if match:
176
            return match.group(2)
177
        return None
178

179
    @property
180
    def hash_name(self) -> Optional[str]:
181
        match = self._hash_re.search(self._url)
182
        if match:
183
            return match.group(1)
184
        return None
185

186
    @property
187
    def show_url(self) -> str:
188
        return posixpath.basename(self._url.split("#", 1)[0].split("?", 1)[0])
189

190
    @property
191
    def is_file(self) -> bool:
192
        return self.scheme == "file"
193

194
    def is_existing_dir(self) -> bool:
195
        return self.is_file and os.path.isdir(self.file_path)
196

197
    @property
198
    def is_wheel(self) -> bool:
199
        return self.ext == WHEEL_EXTENSION
200

201
    @property
202
    def is_vcs(self) -> bool:
203
        from pip._internal.vcs import vcs
204

205
        return self.scheme in vcs.all_schemes
206

207
    @property
208
    def is_yanked(self) -> bool:
209
        return self.yanked_reason is not None
210

211
    @property
212
    def has_hash(self) -> bool:
213
        return self.hash_name is not None
214

215
    def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
216
        """
217
        Return True if the link has a hash and it is allowed.
218
        """
219
        if hashes is None or not self.has_hash:
220
            return False
221
        # Assert non-None so mypy knows self.hash_name and self.hash are str.
222
        assert self.hash_name is not None
223
        assert self.hash is not None
224

225
        return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
226

227

228
class _CleanResult(NamedTuple):
229
    """Convert link for equivalency check.
230

231
    This is used in the resolver to check whether two URL-specified requirements
232
    likely point to the same distribution and can be considered equivalent. This
233
    equivalency logic avoids comparing URLs literally, which can be too strict
234
    (e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users.
235

236
    Currently this does three things:
237

238
    1. Drop the basic auth part. This is technically wrong since a server can
239
       serve different content based on auth, but if it does that, it is even
240
       impossible to guarantee two URLs without auth are equivalent, since
241
       the user can input different auth information when prompted. So the
242
       practical solution is to assume the auth doesn't affect the response.
243
    2. Parse the query to avoid the ordering issue. Note that ordering under the
244
       same key in the query are NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are
245
       still considered different.
246
    3. Explicitly drop most of the fragment part, except ``subdirectory=`` and
247
       hash values, since it should have no impact the downloaded content. Note
248
       that this drops the "egg=" part historically used to denote the requested
249
       project (and extras), which is wrong in the strictest sense, but too many
250
       people are supplying it inconsistently to cause superfluous resolution
251
       conflicts, so we choose to also ignore them.
252
    """
253

254
    parsed: urllib.parse.SplitResult
255
    query: Dict[str, List[str]]
256
    subdirectory: str
257
    hashes: Dict[str, str]
258

259

260
def _clean_link(link: Link) -> _CleanResult:
261
    parsed = link._parsed_url
262
    netloc = parsed.netloc.rsplit("@", 1)[-1]
263
    # According to RFC 8089, an empty host in file: means localhost.
264
    if parsed.scheme == "file" and not netloc:
265
        netloc = "localhost"
266
    fragment = urllib.parse.parse_qs(parsed.fragment)
267
    if "egg" in fragment:
268
        logger.debug("Ignoring egg= fragment in %s", link)
269
    try:
270
        # If there are multiple subdirectory values, use the first one.
271
        # This matches the behavior of Link.subdirectory_fragment.
272
        subdirectory = fragment["subdirectory"][0]
273
    except (IndexError, KeyError):
274
        subdirectory = ""
275
    # If there are multiple hash values under the same algorithm, use the
276
    # first one. This matches the behavior of Link.hash_value.
277
    hashes = {k: fragment[k][0] for k in _SUPPORTED_HASHES if k in fragment}
278
    return _CleanResult(
279
        parsed=parsed._replace(netloc=netloc, query="", fragment=""),
280
        query=urllib.parse.parse_qs(parsed.query),
281
        subdirectory=subdirectory,
282
        hashes=hashes,
283
    )
284

285

286
@functools.lru_cache(maxsize=None)
287
def links_equivalent(link1: Link, link2: Link) -> bool:
288
    return _clean_link(link1) == _clean_link(link2)
289

290
Product

Resources

Company