Path: blob/master/venv/Lib/site-packages/pip/_internal/cache.py
811 views
"""Cache Management1"""23# The following comment should be removed at some point in the future.4# mypy: strict-optional=False56import hashlib7import json8import logging9import os1011from pip._vendor.packaging.tags import interpreter_name, interpreter_version12from pip._vendor.packaging.utils import canonicalize_name1314from pip._internal.exceptions import InvalidWheelFilename15from pip._internal.models.link import Link16from pip._internal.models.wheel import Wheel17from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds18from pip._internal.utils.typing import MYPY_CHECK_RUNNING19from pip._internal.utils.urls import path_to_url2021if MYPY_CHECK_RUNNING:22from typing import Optional, Set, List, Any, Dict2324from pip._vendor.packaging.tags import Tag2526from pip._internal.models.format_control import FormatControl2728logger = logging.getLogger(__name__)293031def _hash_dict(d):32# type: (Dict[str, str]) -> str33"""Return a stable sha224 of a dictionary."""34s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)35return hashlib.sha224(s.encode("ascii")).hexdigest()363738class Cache(object):39"""An abstract class - provides cache directories for data from links404142:param cache_dir: The root of the cache.43:param format_control: An object of FormatControl class to limit44binaries being read from the cache.45:param allowed_formats: which formats of files the cache should store.46('binary' and 'source' are the only allowed values)47"""4849def __init__(self, cache_dir, format_control, allowed_formats):50# type: (str, FormatControl, Set[str]) -> None51super(Cache, self).__init__()52assert not cache_dir or os.path.isabs(cache_dir)53self.cache_dir = cache_dir or None54self.format_control = format_control55self.allowed_formats = allowed_formats5657_valid_formats = {"source", "binary"}58assert self.allowed_formats.union(_valid_formats) == _valid_formats5960def _get_cache_path_parts_legacy(self, link):61# type: (Link) -> List[str]62"""Get parts of part that must be os.path.joined with cache_dir6364Legacy cache key (pip < 20) for compatibility with older caches.65"""6667# We want to generate an url to use as our cache key, we don't want to68# just re-use the URL because it might have other items in the fragment69# and we don't care about those.70key_parts = [link.url_without_fragment]71if link.hash_name is not None and link.hash is not None:72key_parts.append("=".join([link.hash_name, link.hash]))73key_url = "#".join(key_parts)7475# Encode our key url with sha224, we'll use this because it has similar76# security properties to sha256, but with a shorter total output (and77# thus less secure). However the differences don't make a lot of78# difference for our use case here.79hashed = hashlib.sha224(key_url.encode()).hexdigest()8081# We want to nest the directories some to prevent having a ton of top82# level directories where we might run out of sub directories on some83# FS.84parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]8586return parts8788def _get_cache_path_parts(self, link):89# type: (Link) -> List[str]90"""Get parts of part that must be os.path.joined with cache_dir91"""9293# We want to generate an url to use as our cache key, we don't want to94# just re-use the URL because it might have other items in the fragment95# and we don't care about those.96key_parts = {"url": link.url_without_fragment}97if link.hash_name is not None and link.hash is not None:98key_parts[link.hash_name] = link.hash99if link.subdirectory_fragment:100key_parts["subdirectory"] = link.subdirectory_fragment101102# Include interpreter name, major and minor version in cache key103# to cope with ill-behaved sdists that build a different wheel104# depending on the python version their setup.py is being run on,105# and don't encode the difference in compatibility tags.106# https://github.com/pypa/pip/issues/7296107key_parts["interpreter_name"] = interpreter_name()108key_parts["interpreter_version"] = interpreter_version()109110# Encode our key url with sha224, we'll use this because it has similar111# security properties to sha256, but with a shorter total output (and112# thus less secure). However the differences don't make a lot of113# difference for our use case here.114hashed = _hash_dict(key_parts)115116# We want to nest the directories some to prevent having a ton of top117# level directories where we might run out of sub directories on some118# FS.119parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]120121return parts122123def _get_candidates(self, link, canonical_package_name):124# type: (Link, Optional[str]) -> List[Any]125can_not_cache = (126not self.cache_dir or127not canonical_package_name or128not link129)130if can_not_cache:131return []132133formats = self.format_control.get_allowed_formats(134canonical_package_name135)136if not self.allowed_formats.intersection(formats):137return []138139candidates = []140path = self.get_path_for_link(link)141if os.path.isdir(path):142for candidate in os.listdir(path):143candidates.append((candidate, path))144# TODO remove legacy path lookup in pip>=21145legacy_path = self.get_path_for_link_legacy(link)146if os.path.isdir(legacy_path):147for candidate in os.listdir(legacy_path):148candidates.append((candidate, legacy_path))149return candidates150151def get_path_for_link_legacy(self, link):152# type: (Link) -> str153raise NotImplementedError()154155def get_path_for_link(self, link):156# type: (Link) -> str157"""Return a directory to store cached items in for link.158"""159raise NotImplementedError()160161def get(162self,163link, # type: Link164package_name, # type: Optional[str]165supported_tags, # type: List[Tag]166):167# type: (...) -> Link168"""Returns a link to a cached item if it exists, otherwise returns the169passed link.170"""171raise NotImplementedError()172173174class SimpleWheelCache(Cache):175"""A cache of wheels for future installs.176"""177178def __init__(self, cache_dir, format_control):179# type: (str, FormatControl) -> None180super(SimpleWheelCache, self).__init__(181cache_dir, format_control, {"binary"}182)183184def get_path_for_link_legacy(self, link):185# type: (Link) -> str186parts = self._get_cache_path_parts_legacy(link)187return os.path.join(self.cache_dir, "wheels", *parts)188189def get_path_for_link(self, link):190# type: (Link) -> str191"""Return a directory to store cached wheels for link192193Because there are M wheels for any one sdist, we provide a directory194to cache them in, and then consult that directory when looking up195cache hits.196197We only insert things into the cache if they have plausible version198numbers, so that we don't contaminate the cache with things that were199not unique. E.g. ./package might have dozens of installs done for it200and build a version of 0.0...and if we built and cached a wheel, we'd201end up using the same wheel even if the source has been edited.202203:param link: The link of the sdist for which this will cache wheels.204"""205parts = self._get_cache_path_parts(link)206207# Store wheels within the root cache_dir208return os.path.join(self.cache_dir, "wheels", *parts)209210def get(211self,212link, # type: Link213package_name, # type: Optional[str]214supported_tags, # type: List[Tag]215):216# type: (...) -> Link217candidates = []218219if not package_name:220return link221222canonical_package_name = canonicalize_name(package_name)223for wheel_name, wheel_dir in self._get_candidates(224link, canonical_package_name225):226try:227wheel = Wheel(wheel_name)228except InvalidWheelFilename:229continue230if canonicalize_name(wheel.name) != canonical_package_name:231logger.debug(232"Ignoring cached wheel {} for {} as it "233"does not match the expected distribution name {}.".format(234wheel_name, link, package_name235)236)237continue238if not wheel.supported(supported_tags):239# Built for a different python/arch/etc240continue241candidates.append(242(243wheel.support_index_min(supported_tags),244wheel_name,245wheel_dir,246)247)248249if not candidates:250return link251252_, wheel_name, wheel_dir = min(candidates)253return Link(path_to_url(os.path.join(wheel_dir, wheel_name)))254255256class EphemWheelCache(SimpleWheelCache):257"""A SimpleWheelCache that creates it's own temporary cache directory258"""259260def __init__(self, format_control):261# type: (FormatControl) -> None262self._temp_dir = TempDirectory(263kind=tempdir_kinds.EPHEM_WHEEL_CACHE,264globally_managed=True,265)266267super(EphemWheelCache, self).__init__(268self._temp_dir.path, format_control269)270271272class CacheEntry(object):273def __init__(274self,275link, # type: Link276persistent, # type: bool277):278self.link = link279self.persistent = persistent280281282class WheelCache(Cache):283"""Wraps EphemWheelCache and SimpleWheelCache into a single Cache284285This Cache allows for gracefully degradation, using the ephem wheel cache286when a certain link is not found in the simple wheel cache first.287"""288289def __init__(self, cache_dir, format_control):290# type: (str, FormatControl) -> None291super(WheelCache, self).__init__(292cache_dir, format_control, {'binary'}293)294self._wheel_cache = SimpleWheelCache(cache_dir, format_control)295self._ephem_cache = EphemWheelCache(format_control)296297def get_path_for_link_legacy(self, link):298# type: (Link) -> str299return self._wheel_cache.get_path_for_link_legacy(link)300301def get_path_for_link(self, link):302# type: (Link) -> str303return self._wheel_cache.get_path_for_link(link)304305def get_ephem_path_for_link(self, link):306# type: (Link) -> str307return self._ephem_cache.get_path_for_link(link)308309def get(310self,311link, # type: Link312package_name, # type: Optional[str]313supported_tags, # type: List[Tag]314):315# type: (...) -> Link316cache_entry = self.get_cache_entry(link, package_name, supported_tags)317if cache_entry is None:318return link319return cache_entry.link320321def get_cache_entry(322self,323link, # type: Link324package_name, # type: Optional[str]325supported_tags, # type: List[Tag]326):327# type: (...) -> Optional[CacheEntry]328"""Returns a CacheEntry with a link to a cached item if it exists or329None. The cache entry indicates if the item was found in the persistent330or ephemeral cache.331"""332retval = self._wheel_cache.get(333link=link,334package_name=package_name,335supported_tags=supported_tags,336)337if retval is not link:338return CacheEntry(retval, persistent=True)339340retval = self._ephem_cache.get(341link=link,342package_name=package_name,343supported_tags=supported_tags,344)345if retval is not link:346return CacheEntry(retval, persistent=False)347348return None349350351