Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
keewenaw
GitHub Repository: keewenaw/ethereum-wallet-cracker
Path: blob/main/test/lib/python3.9/site-packages/pip/_internal/models/link.py
4804 views
1
import functools
2
import logging
3
import os
4
import posixpath
5
import re
6
import urllib.parse
7
from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union
8
9
from pip._internal.utils.filetypes import WHEEL_EXTENSION
10
from pip._internal.utils.hashes import Hashes
11
from pip._internal.utils.misc import (
12
redact_auth_from_url,
13
split_auth_from_netloc,
14
splitext,
15
)
16
from pip._internal.utils.models import KeyBasedCompareMixin
17
from pip._internal.utils.urls import path_to_url, url_to_path
18
19
if TYPE_CHECKING:
20
from pip._internal.index.collector import HTMLPage
21
22
logger = logging.getLogger(__name__)
23
24
25
_SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5")
26
27
28
class Link(KeyBasedCompareMixin):
29
"""Represents a parsed link from a Package Index's simple URL"""
30
31
__slots__ = [
32
"_parsed_url",
33
"_url",
34
"comes_from",
35
"requires_python",
36
"yanked_reason",
37
"cache_link_parsing",
38
]
39
40
def __init__(
41
self,
42
url: str,
43
comes_from: Optional[Union[str, "HTMLPage"]] = None,
44
requires_python: Optional[str] = None,
45
yanked_reason: Optional[str] = None,
46
cache_link_parsing: bool = True,
47
) -> None:
48
"""
49
:param url: url of the resource pointed to (href of the link)
50
:param comes_from: instance of HTMLPage where the link was found,
51
or string.
52
:param requires_python: String containing the `Requires-Python`
53
metadata field, specified in PEP 345. This may be specified by
54
a data-requires-python attribute in the HTML link tag, as
55
described in PEP 503.
56
:param yanked_reason: the reason the file has been yanked, if the
57
file has been yanked, or None if the file hasn't been yanked.
58
This is the value of the "data-yanked" attribute, if present, in
59
a simple repository HTML link. If the file has been yanked but
60
no reason was provided, this should be the empty string. See
61
PEP 592 for more information and the specification.
62
:param cache_link_parsing: A flag that is used elsewhere to determine
63
whether resources retrieved from this link
64
should be cached. PyPI index urls should
65
generally have this set to False, for
66
example.
67
"""
68
69
# url can be a UNC windows share
70
if url.startswith("\\\\"):
71
url = path_to_url(url)
72
73
self._parsed_url = urllib.parse.urlsplit(url)
74
# Store the url as a private attribute to prevent accidentally
75
# trying to set a new value.
76
self._url = url
77
78
self.comes_from = comes_from
79
self.requires_python = requires_python if requires_python else None
80
self.yanked_reason = yanked_reason
81
82
super().__init__(key=url, defining_class=Link)
83
84
self.cache_link_parsing = cache_link_parsing
85
86
def __str__(self) -> str:
87
if self.requires_python:
88
rp = f" (requires-python:{self.requires_python})"
89
else:
90
rp = ""
91
if self.comes_from:
92
return "{} (from {}){}".format(
93
redact_auth_from_url(self._url), self.comes_from, rp
94
)
95
else:
96
return redact_auth_from_url(str(self._url))
97
98
def __repr__(self) -> str:
99
return f"<Link {self}>"
100
101
@property
102
def url(self) -> str:
103
return self._url
104
105
@property
106
def filename(self) -> str:
107
path = self.path.rstrip("/")
108
name = posixpath.basename(path)
109
if not name:
110
# Make sure we don't leak auth information if the netloc
111
# includes a username and password.
112
netloc, user_pass = split_auth_from_netloc(self.netloc)
113
return netloc
114
115
name = urllib.parse.unquote(name)
116
assert name, f"URL {self._url!r} produced no filename"
117
return name
118
119
@property
120
def file_path(self) -> str:
121
return url_to_path(self.url)
122
123
@property
124
def scheme(self) -> str:
125
return self._parsed_url.scheme
126
127
@property
128
def netloc(self) -> str:
129
"""
130
This can contain auth information.
131
"""
132
return self._parsed_url.netloc
133
134
@property
135
def path(self) -> str:
136
return urllib.parse.unquote(self._parsed_url.path)
137
138
def splitext(self) -> Tuple[str, str]:
139
return splitext(posixpath.basename(self.path.rstrip("/")))
140
141
@property
142
def ext(self) -> str:
143
return self.splitext()[1]
144
145
@property
146
def url_without_fragment(self) -> str:
147
scheme, netloc, path, query, fragment = self._parsed_url
148
return urllib.parse.urlunsplit((scheme, netloc, path, query, ""))
149
150
_egg_fragment_re = re.compile(r"[#&]egg=([^&]*)")
151
152
@property
153
def egg_fragment(self) -> Optional[str]:
154
match = self._egg_fragment_re.search(self._url)
155
if not match:
156
return None
157
return match.group(1)
158
159
_subdirectory_fragment_re = re.compile(r"[#&]subdirectory=([^&]*)")
160
161
@property
162
def subdirectory_fragment(self) -> Optional[str]:
163
match = self._subdirectory_fragment_re.search(self._url)
164
if not match:
165
return None
166
return match.group(1)
167
168
_hash_re = re.compile(
169
r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES))
170
)
171
172
@property
173
def hash(self) -> Optional[str]:
174
match = self._hash_re.search(self._url)
175
if match:
176
return match.group(2)
177
return None
178
179
@property
180
def hash_name(self) -> Optional[str]:
181
match = self._hash_re.search(self._url)
182
if match:
183
return match.group(1)
184
return None
185
186
@property
187
def show_url(self) -> str:
188
return posixpath.basename(self._url.split("#", 1)[0].split("?", 1)[0])
189
190
@property
191
def is_file(self) -> bool:
192
return self.scheme == "file"
193
194
def is_existing_dir(self) -> bool:
195
return self.is_file and os.path.isdir(self.file_path)
196
197
@property
198
def is_wheel(self) -> bool:
199
return self.ext == WHEEL_EXTENSION
200
201
@property
202
def is_vcs(self) -> bool:
203
from pip._internal.vcs import vcs
204
205
return self.scheme in vcs.all_schemes
206
207
@property
208
def is_yanked(self) -> bool:
209
return self.yanked_reason is not None
210
211
@property
212
def has_hash(self) -> bool:
213
return self.hash_name is not None
214
215
def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
216
"""
217
Return True if the link has a hash and it is allowed.
218
"""
219
if hashes is None or not self.has_hash:
220
return False
221
# Assert non-None so mypy knows self.hash_name and self.hash are str.
222
assert self.hash_name is not None
223
assert self.hash is not None
224
225
return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
226
227
228
class _CleanResult(NamedTuple):
229
"""Convert link for equivalency check.
230
231
This is used in the resolver to check whether two URL-specified requirements
232
likely point to the same distribution and can be considered equivalent. This
233
equivalency logic avoids comparing URLs literally, which can be too strict
234
(e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users.
235
236
Currently this does three things:
237
238
1. Drop the basic auth part. This is technically wrong since a server can
239
serve different content based on auth, but if it does that, it is even
240
impossible to guarantee two URLs without auth are equivalent, since
241
the user can input different auth information when prompted. So the
242
practical solution is to assume the auth doesn't affect the response.
243
2. Parse the query to avoid the ordering issue. Note that ordering under the
244
same key in the query are NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are
245
still considered different.
246
3. Explicitly drop most of the fragment part, except ``subdirectory=`` and
247
hash values, since it should have no impact the downloaded content. Note
248
that this drops the "egg=" part historically used to denote the requested
249
project (and extras), which is wrong in the strictest sense, but too many
250
people are supplying it inconsistently to cause superfluous resolution
251
conflicts, so we choose to also ignore them.
252
"""
253
254
parsed: urllib.parse.SplitResult
255
query: Dict[str, List[str]]
256
subdirectory: str
257
hashes: Dict[str, str]
258
259
260
def _clean_link(link: Link) -> _CleanResult:
261
parsed = link._parsed_url
262
netloc = parsed.netloc.rsplit("@", 1)[-1]
263
# According to RFC 8089, an empty host in file: means localhost.
264
if parsed.scheme == "file" and not netloc:
265
netloc = "localhost"
266
fragment = urllib.parse.parse_qs(parsed.fragment)
267
if "egg" in fragment:
268
logger.debug("Ignoring egg= fragment in %s", link)
269
try:
270
# If there are multiple subdirectory values, use the first one.
271
# This matches the behavior of Link.subdirectory_fragment.
272
subdirectory = fragment["subdirectory"][0]
273
except (IndexError, KeyError):
274
subdirectory = ""
275
# If there are multiple hash values under the same algorithm, use the
276
# first one. This matches the behavior of Link.hash_value.
277
hashes = {k: fragment[k][0] for k in _SUPPORTED_HASHES if k in fragment}
278
return _CleanResult(
279
parsed=parsed._replace(netloc=netloc, query="", fragment=""),
280
query=urllib.parse.parse_qs(parsed.query),
281
subdirectory=subdirectory,
282
hashes=hashes,
283
)
284
285
286
@functools.lru_cache(maxsize=None)
287
def links_equivalent(link1: Link, link2: Link) -> bool:
288
return _clean_link(link1) == _clean_link(link2)
289
290