Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hhhrrrttt222111
GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/pip/_internal/cache.py
811 views
1
"""Cache Management
2
"""
3
4
# The following comment should be removed at some point in the future.
5
# mypy: strict-optional=False
6
7
import hashlib
8
import json
9
import logging
10
import os
11
12
from pip._vendor.packaging.tags import interpreter_name, interpreter_version
13
from pip._vendor.packaging.utils import canonicalize_name
14
15
from pip._internal.exceptions import InvalidWheelFilename
16
from pip._internal.models.link import Link
17
from pip._internal.models.wheel import Wheel
18
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
19
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
20
from pip._internal.utils.urls import path_to_url
21
22
if MYPY_CHECK_RUNNING:
23
from typing import Optional, Set, List, Any, Dict
24
25
from pip._vendor.packaging.tags import Tag
26
27
from pip._internal.models.format_control import FormatControl
28
29
logger = logging.getLogger(__name__)
30
31
32
def _hash_dict(d):
33
# type: (Dict[str, str]) -> str
34
"""Return a stable sha224 of a dictionary."""
35
s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
36
return hashlib.sha224(s.encode("ascii")).hexdigest()
37
38
39
class Cache(object):
40
"""An abstract class - provides cache directories for data from links
41
42
43
:param cache_dir: The root of the cache.
44
:param format_control: An object of FormatControl class to limit
45
binaries being read from the cache.
46
:param allowed_formats: which formats of files the cache should store.
47
('binary' and 'source' are the only allowed values)
48
"""
49
50
def __init__(self, cache_dir, format_control, allowed_formats):
51
# type: (str, FormatControl, Set[str]) -> None
52
super(Cache, self).__init__()
53
assert not cache_dir or os.path.isabs(cache_dir)
54
self.cache_dir = cache_dir or None
55
self.format_control = format_control
56
self.allowed_formats = allowed_formats
57
58
_valid_formats = {"source", "binary"}
59
assert self.allowed_formats.union(_valid_formats) == _valid_formats
60
61
def _get_cache_path_parts_legacy(self, link):
62
# type: (Link) -> List[str]
63
"""Get parts of part that must be os.path.joined with cache_dir
64
65
Legacy cache key (pip < 20) for compatibility with older caches.
66
"""
67
68
# We want to generate an url to use as our cache key, we don't want to
69
# just re-use the URL because it might have other items in the fragment
70
# and we don't care about those.
71
key_parts = [link.url_without_fragment]
72
if link.hash_name is not None and link.hash is not None:
73
key_parts.append("=".join([link.hash_name, link.hash]))
74
key_url = "#".join(key_parts)
75
76
# Encode our key url with sha224, we'll use this because it has similar
77
# security properties to sha256, but with a shorter total output (and
78
# thus less secure). However the differences don't make a lot of
79
# difference for our use case here.
80
hashed = hashlib.sha224(key_url.encode()).hexdigest()
81
82
# We want to nest the directories some to prevent having a ton of top
83
# level directories where we might run out of sub directories on some
84
# FS.
85
parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
86
87
return parts
88
89
def _get_cache_path_parts(self, link):
90
# type: (Link) -> List[str]
91
"""Get parts of part that must be os.path.joined with cache_dir
92
"""
93
94
# We want to generate an url to use as our cache key, we don't want to
95
# just re-use the URL because it might have other items in the fragment
96
# and we don't care about those.
97
key_parts = {"url": link.url_without_fragment}
98
if link.hash_name is not None and link.hash is not None:
99
key_parts[link.hash_name] = link.hash
100
if link.subdirectory_fragment:
101
key_parts["subdirectory"] = link.subdirectory_fragment
102
103
# Include interpreter name, major and minor version in cache key
104
# to cope with ill-behaved sdists that build a different wheel
105
# depending on the python version their setup.py is being run on,
106
# and don't encode the difference in compatibility tags.
107
# https://github.com/pypa/pip/issues/7296
108
key_parts["interpreter_name"] = interpreter_name()
109
key_parts["interpreter_version"] = interpreter_version()
110
111
# Encode our key url with sha224, we'll use this because it has similar
112
# security properties to sha256, but with a shorter total output (and
113
# thus less secure). However the differences don't make a lot of
114
# difference for our use case here.
115
hashed = _hash_dict(key_parts)
116
117
# We want to nest the directories some to prevent having a ton of top
118
# level directories where we might run out of sub directories on some
119
# FS.
120
parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
121
122
return parts
123
124
def _get_candidates(self, link, canonical_package_name):
125
# type: (Link, Optional[str]) -> List[Any]
126
can_not_cache = (
127
not self.cache_dir or
128
not canonical_package_name or
129
not link
130
)
131
if can_not_cache:
132
return []
133
134
formats = self.format_control.get_allowed_formats(
135
canonical_package_name
136
)
137
if not self.allowed_formats.intersection(formats):
138
return []
139
140
candidates = []
141
path = self.get_path_for_link(link)
142
if os.path.isdir(path):
143
for candidate in os.listdir(path):
144
candidates.append((candidate, path))
145
# TODO remove legacy path lookup in pip>=21
146
legacy_path = self.get_path_for_link_legacy(link)
147
if os.path.isdir(legacy_path):
148
for candidate in os.listdir(legacy_path):
149
candidates.append((candidate, legacy_path))
150
return candidates
151
152
def get_path_for_link_legacy(self, link):
153
# type: (Link) -> str
154
raise NotImplementedError()
155
156
def get_path_for_link(self, link):
157
# type: (Link) -> str
158
"""Return a directory to store cached items in for link.
159
"""
160
raise NotImplementedError()
161
162
def get(
163
self,
164
link, # type: Link
165
package_name, # type: Optional[str]
166
supported_tags, # type: List[Tag]
167
):
168
# type: (...) -> Link
169
"""Returns a link to a cached item if it exists, otherwise returns the
170
passed link.
171
"""
172
raise NotImplementedError()
173
174
175
class SimpleWheelCache(Cache):
176
"""A cache of wheels for future installs.
177
"""
178
179
def __init__(self, cache_dir, format_control):
180
# type: (str, FormatControl) -> None
181
super(SimpleWheelCache, self).__init__(
182
cache_dir, format_control, {"binary"}
183
)
184
185
def get_path_for_link_legacy(self, link):
186
# type: (Link) -> str
187
parts = self._get_cache_path_parts_legacy(link)
188
return os.path.join(self.cache_dir, "wheels", *parts)
189
190
def get_path_for_link(self, link):
191
# type: (Link) -> str
192
"""Return a directory to store cached wheels for link
193
194
Because there are M wheels for any one sdist, we provide a directory
195
to cache them in, and then consult that directory when looking up
196
cache hits.
197
198
We only insert things into the cache if they have plausible version
199
numbers, so that we don't contaminate the cache with things that were
200
not unique. E.g. ./package might have dozens of installs done for it
201
and build a version of 0.0...and if we built and cached a wheel, we'd
202
end up using the same wheel even if the source has been edited.
203
204
:param link: The link of the sdist for which this will cache wheels.
205
"""
206
parts = self._get_cache_path_parts(link)
207
208
# Store wheels within the root cache_dir
209
return os.path.join(self.cache_dir, "wheels", *parts)
210
211
def get(
212
self,
213
link, # type: Link
214
package_name, # type: Optional[str]
215
supported_tags, # type: List[Tag]
216
):
217
# type: (...) -> Link
218
candidates = []
219
220
if not package_name:
221
return link
222
223
canonical_package_name = canonicalize_name(package_name)
224
for wheel_name, wheel_dir in self._get_candidates(
225
link, canonical_package_name
226
):
227
try:
228
wheel = Wheel(wheel_name)
229
except InvalidWheelFilename:
230
continue
231
if canonicalize_name(wheel.name) != canonical_package_name:
232
logger.debug(
233
"Ignoring cached wheel {} for {} as it "
234
"does not match the expected distribution name {}.".format(
235
wheel_name, link, package_name
236
)
237
)
238
continue
239
if not wheel.supported(supported_tags):
240
# Built for a different python/arch/etc
241
continue
242
candidates.append(
243
(
244
wheel.support_index_min(supported_tags),
245
wheel_name,
246
wheel_dir,
247
)
248
)
249
250
if not candidates:
251
return link
252
253
_, wheel_name, wheel_dir = min(candidates)
254
return Link(path_to_url(os.path.join(wheel_dir, wheel_name)))
255
256
257
class EphemWheelCache(SimpleWheelCache):
258
"""A SimpleWheelCache that creates it's own temporary cache directory
259
"""
260
261
def __init__(self, format_control):
262
# type: (FormatControl) -> None
263
self._temp_dir = TempDirectory(
264
kind=tempdir_kinds.EPHEM_WHEEL_CACHE,
265
globally_managed=True,
266
)
267
268
super(EphemWheelCache, self).__init__(
269
self._temp_dir.path, format_control
270
)
271
272
273
class CacheEntry(object):
274
def __init__(
275
self,
276
link, # type: Link
277
persistent, # type: bool
278
):
279
self.link = link
280
self.persistent = persistent
281
282
283
class WheelCache(Cache):
284
"""Wraps EphemWheelCache and SimpleWheelCache into a single Cache
285
286
This Cache allows for gracefully degradation, using the ephem wheel cache
287
when a certain link is not found in the simple wheel cache first.
288
"""
289
290
def __init__(self, cache_dir, format_control):
291
# type: (str, FormatControl) -> None
292
super(WheelCache, self).__init__(
293
cache_dir, format_control, {'binary'}
294
)
295
self._wheel_cache = SimpleWheelCache(cache_dir, format_control)
296
self._ephem_cache = EphemWheelCache(format_control)
297
298
def get_path_for_link_legacy(self, link):
299
# type: (Link) -> str
300
return self._wheel_cache.get_path_for_link_legacy(link)
301
302
def get_path_for_link(self, link):
303
# type: (Link) -> str
304
return self._wheel_cache.get_path_for_link(link)
305
306
def get_ephem_path_for_link(self, link):
307
# type: (Link) -> str
308
return self._ephem_cache.get_path_for_link(link)
309
310
def get(
311
self,
312
link, # type: Link
313
package_name, # type: Optional[str]
314
supported_tags, # type: List[Tag]
315
):
316
# type: (...) -> Link
317
cache_entry = self.get_cache_entry(link, package_name, supported_tags)
318
if cache_entry is None:
319
return link
320
return cache_entry.link
321
322
def get_cache_entry(
323
self,
324
link, # type: Link
325
package_name, # type: Optional[str]
326
supported_tags, # type: List[Tag]
327
):
328
# type: (...) -> Optional[CacheEntry]
329
"""Returns a CacheEntry with a link to a cached item if it exists or
330
None. The cache entry indicates if the item was found in the persistent
331
or ephemeral cache.
332
"""
333
retval = self._wheel_cache.get(
334
link=link,
335
package_name=package_name,
336
supported_tags=supported_tags,
337
)
338
if retval is not link:
339
return CacheEntry(retval, persistent=True)
340
341
retval = self._ephem_cache.get(
342
link=link,
343
package_name=package_name,
344
supported_tags=supported_tags,
345
)
346
if retval is not link:
347
return CacheEntry(retval, persistent=False)
348
349
return None
350
351