Path: blob/master/ invest-robot-contest_TinkoffBotTwitch-main/venv/lib/python3.8/site-packages/pandas/io/common.py
7813 views
"""Common IO api utilities"""1from __future__ import annotations23import bz24import codecs5from collections import abc6import dataclasses7import functools8import gzip9from io import (10BufferedIOBase,11BytesIO,12RawIOBase,13StringIO,14TextIOBase,15TextIOWrapper,16)17import mmap18import os19from pathlib import Path20import re21from typing import (22IO,23Any,24AnyStr,25Generic,26Literal,27Mapping,28TypeVar,29cast,30overload,31)32from urllib.parse import (33urljoin,34urlparse as parse_url,35uses_netloc,36uses_params,37uses_relative,38)39import warnings40import zipfile4142from pandas._typing import (43BaseBuffer,44CompressionDict,45CompressionOptions,46FilePath,47ReadBuffer,48StorageOptions,49WriteBuffer,50)51from pandas.compat import get_lzma_file52from pandas.compat._optional import import_optional_dependency53from pandas.util._decorators import doc54from pandas.util._exceptions import find_stack_level5556from pandas.core.dtypes.common import is_file_like5758from pandas.core.shared_docs import _shared_docs5960_VALID_URLS = set(uses_relative + uses_netloc + uses_params)61_VALID_URLS.discard("")62_RFC_3986_PATTERN = re.compile(r"^[A-Za-z][A-Za-z0-9+\-+.]*://")6364BaseBufferT = TypeVar("BaseBufferT", bound=BaseBuffer)656667@dataclasses.dataclass68class IOArgs:69"""70Return value of io/common.py:_get_filepath_or_buffer.71"""7273filepath_or_buffer: str | BaseBuffer74encoding: str75mode: str76compression: CompressionDict77should_close: bool = False787980@dataclasses.dataclass81class IOHandles(Generic[AnyStr]):82"""83Return value of io/common.py:get_handle8485Can be used as a context manager.8687This is used to easily close created buffers and to handle corner cases when88TextIOWrapper is inserted.8990handle: The file handle to be used.91created_handles: All file handles that are created by get_handle92is_wrapped: Whether a TextIOWrapper needs to be detached.93"""9495# handle might not implement the IO-interface96handle: IO[AnyStr]97compression: CompressionDict98created_handles: list[IO[bytes] | IO[str]] = dataclasses.field(default_factory=list)99is_wrapped: bool = False100is_mmap: bool = False101102def close(self) -> None:103"""104Close all created buffers.105106Note: If a TextIOWrapper was inserted, it is flushed and detached to107avoid closing the potentially user-created buffer.108"""109if self.is_wrapped:110assert isinstance(self.handle, TextIOWrapper)111self.handle.flush()112self.handle.detach()113self.created_handles.remove(self.handle)114try:115for handle in self.created_handles:116handle.close()117except (OSError, ValueError):118pass119self.created_handles = []120self.is_wrapped = False121122def __enter__(self) -> IOHandles[AnyStr]:123return self124125def __exit__(self, *args: Any) -> None:126self.close()127128129def is_url(url: object) -> bool:130"""131Check to see if a URL has a valid protocol.132133Parameters134----------135url : str or unicode136137Returns138-------139isurl : bool140If `url` has a valid protocol return True otherwise False.141"""142if not isinstance(url, str):143return False144return parse_url(url).scheme in _VALID_URLS145146147@overload148def _expand_user(filepath_or_buffer: str) -> str:149...150151152@overload153def _expand_user(filepath_or_buffer: BaseBufferT) -> BaseBufferT:154...155156157def _expand_user(filepath_or_buffer: str | BaseBufferT) -> str | BaseBufferT:158"""159Return the argument with an initial component of ~ or ~user160replaced by that user's home directory.161162Parameters163----------164filepath_or_buffer : object to be converted if possible165166Returns167-------168expanded_filepath_or_buffer : an expanded filepath or the169input if not expandable170"""171if isinstance(filepath_or_buffer, str):172return os.path.expanduser(filepath_or_buffer)173return filepath_or_buffer174175176def validate_header_arg(header: object) -> None:177if isinstance(header, bool):178raise TypeError(179"Passing a bool to header is invalid. Use header=None for no header or "180"header=int or list-like of ints to specify "181"the row(s) making up the column names"182)183184185@overload186def stringify_path(filepath_or_buffer: FilePath, convert_file_like: bool = ...) -> str:187...188189190@overload191def stringify_path(192filepath_or_buffer: BaseBufferT, convert_file_like: bool = ...193) -> BaseBufferT:194...195196197def stringify_path(198filepath_or_buffer: FilePath | BaseBufferT,199convert_file_like: bool = False,200) -> str | BaseBufferT:201"""202Attempt to convert a path-like object to a string.203204Parameters205----------206filepath_or_buffer : object to be converted207208Returns209-------210str_filepath_or_buffer : maybe a string version of the object211212Notes213-----214Objects supporting the fspath protocol (python 3.6+) are coerced215according to its __fspath__ method.216217Any other object is passed through unchanged, which includes bytes,218strings, buffers, or anything else that's not even path-like.219"""220if not convert_file_like and is_file_like(filepath_or_buffer):221# GH 38125: some fsspec objects implement os.PathLike but have already opened a222# file. This prevents opening the file a second time. infer_compression calls223# this function with convert_file_like=True to infer the compression.224return cast(BaseBufferT, filepath_or_buffer)225226if isinstance(filepath_or_buffer, os.PathLike):227filepath_or_buffer = filepath_or_buffer.__fspath__()228return _expand_user(filepath_or_buffer)229230231def urlopen(*args, **kwargs):232"""233Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of234the stdlib.235"""236import urllib.request237238return urllib.request.urlopen(*args, **kwargs)239240241def is_fsspec_url(url: FilePath | BaseBuffer) -> bool:242"""243Returns true if the given URL looks like244something fsspec can handle245"""246return (247isinstance(url, str)248and bool(_RFC_3986_PATTERN.match(url))249and not url.startswith(("http://", "https://"))250)251252253@doc(compression_options=_shared_docs["compression_options"] % "filepath_or_buffer")254def _get_filepath_or_buffer(255filepath_or_buffer: FilePath | BaseBuffer,256encoding: str = "utf-8",257compression: CompressionOptions = None,258mode: str = "r",259storage_options: StorageOptions = None,260) -> IOArgs:261"""262If the filepath_or_buffer is a url, translate and return the buffer.263Otherwise passthrough.264265Parameters266----------267filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),268or buffer269{compression_options}270271.. versionchanged:: 1.4.0 Zstandard support.272273encoding : the encoding to use to decode bytes, default is 'utf-8'274mode : str, optional275276storage_options : dict, optional277Extra options that make sense for a particular storage connection, e.g.278host, port, username, password, etc., if using a URL that will279be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error280will be raised if providing this argument with a local path or281a file-like buffer. See the fsspec and backend storage implementation282docs for the set of allowed keys and values283284.. versionadded:: 1.2.0285286..versionchange:: 1.2.0287288Returns the dataclass IOArgs.289"""290filepath_or_buffer = stringify_path(filepath_or_buffer)291292# handle compression dict293compression_method, compression = get_compression_method(compression)294compression_method = infer_compression(filepath_or_buffer, compression_method)295296# GH21227 internal compression is not used for non-binary handles.297if compression_method and hasattr(filepath_or_buffer, "write") and "b" not in mode:298warnings.warn(299"compression has no effect when passing a non-binary object as input.",300RuntimeWarning,301stacklevel=find_stack_level(),302)303compression_method = None304305compression = dict(compression, method=compression_method)306307# bz2 and xz do not write the byte order mark for utf-16 and utf-32308# print a warning when writing such files309if (310"w" in mode311and compression_method in ["bz2", "xz"]312and encoding in ["utf-16", "utf-32"]313):314warnings.warn(315f"{compression} will not write the byte order mark for {encoding}",316UnicodeWarning,317)318319# Use binary mode when converting path-like objects to file-like objects (fsspec)320# except when text mode is explicitly requested. The original mode is returned if321# fsspec is not used.322fsspec_mode = mode323if "t" not in fsspec_mode and "b" not in fsspec_mode:324fsspec_mode += "b"325326if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer):327# TODO: fsspec can also handle HTTP via requests, but leaving this328# unchanged. using fsspec appears to break the ability to infer if the329# server responded with gzipped data330storage_options = storage_options or {}331332# waiting until now for importing to match intended lazy logic of333# urlopen function defined elsewhere in this module334import urllib.request335336# assuming storage_options is to be interpreted as headers337req_info = urllib.request.Request(filepath_or_buffer, headers=storage_options)338with urlopen(req_info) as req:339content_encoding = req.headers.get("Content-Encoding", None)340if content_encoding == "gzip":341# Override compression based on Content-Encoding header342compression = {"method": "gzip"}343reader = BytesIO(req.read())344return IOArgs(345filepath_or_buffer=reader,346encoding=encoding,347compression=compression,348should_close=True,349mode=fsspec_mode,350)351352if is_fsspec_url(filepath_or_buffer):353assert isinstance(354filepath_or_buffer, str355) # just to appease mypy for this branch356# two special-case s3-like protocols; these have special meaning in Hadoop,357# but are equivalent to just "s3" from fsspec's point of view358# cc #11071359if filepath_or_buffer.startswith("s3a://"):360filepath_or_buffer = filepath_or_buffer.replace("s3a://", "s3://")361if filepath_or_buffer.startswith("s3n://"):362filepath_or_buffer = filepath_or_buffer.replace("s3n://", "s3://")363fsspec = import_optional_dependency("fsspec")364365# If botocore is installed we fallback to reading with anon=True366# to allow reads from public buckets367err_types_to_retry_with_anon: list[Any] = []368try:369import_optional_dependency("botocore")370from botocore.exceptions import (371ClientError,372NoCredentialsError,373)374375err_types_to_retry_with_anon = [376ClientError,377NoCredentialsError,378PermissionError,379]380except ImportError:381pass382383try:384file_obj = fsspec.open(385filepath_or_buffer, mode=fsspec_mode, **(storage_options or {})386).open()387# GH 34626 Reads from Public Buckets without Credentials needs anon=True388except tuple(err_types_to_retry_with_anon):389if storage_options is None:390storage_options = {"anon": True}391else:392# don't mutate user input.393storage_options = dict(storage_options)394storage_options["anon"] = True395file_obj = fsspec.open(396filepath_or_buffer, mode=fsspec_mode, **(storage_options or {})397).open()398399return IOArgs(400filepath_or_buffer=file_obj,401encoding=encoding,402compression=compression,403should_close=True,404mode=fsspec_mode,405)406elif storage_options:407raise ValueError(408"storage_options passed with file object or non-fsspec file path"409)410411if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)):412return IOArgs(413filepath_or_buffer=_expand_user(filepath_or_buffer),414encoding=encoding,415compression=compression,416should_close=False,417mode=mode,418)419420# is_file_like requires (read | write) & __iter__ but __iter__ is only421# needed for read_csv(engine=python)422if not (423hasattr(filepath_or_buffer, "read") or hasattr(filepath_or_buffer, "write")424):425msg = f"Invalid file path or buffer object type: {type(filepath_or_buffer)}"426raise ValueError(msg)427428return IOArgs(429filepath_or_buffer=filepath_or_buffer,430encoding=encoding,431compression=compression,432should_close=False,433mode=mode,434)435436437def file_path_to_url(path: str) -> str:438"""439converts an absolute native path to a FILE URL.440441Parameters442----------443path : a path in native format444445Returns446-------447a valid FILE URL448"""449# lazify expensive import (~30ms)450from urllib.request import pathname2url451452return urljoin("file:", pathname2url(path))453454455_compression_to_extension = {456"gzip": ".gz",457"bz2": ".bz2",458"zip": ".zip",459"xz": ".xz",460"zstd": ".zst",461}462463464def get_compression_method(465compression: CompressionOptions,466) -> tuple[str | None, CompressionDict]:467"""468Simplifies a compression argument to a compression method string and469a mapping containing additional arguments.470471Parameters472----------473compression : str or mapping474If string, specifies the compression method. If mapping, value at key475'method' specifies compression method.476477Returns478-------479tuple of ({compression method}, Optional[str]480{compression arguments}, Dict[str, Any])481482Raises483------484ValueError on mapping missing 'method' key485"""486compression_method: str | None487if isinstance(compression, Mapping):488compression_args = dict(compression)489try:490compression_method = compression_args.pop("method")491except KeyError as err:492raise ValueError("If mapping, compression must have key 'method'") from err493else:494compression_args = {}495compression_method = compression496return compression_method, compression_args497498499@doc(compression_options=_shared_docs["compression_options"] % "filepath_or_buffer")500def infer_compression(501filepath_or_buffer: FilePath | BaseBuffer, compression: str | None502) -> str | None:503"""504Get the compression method for filepath_or_buffer. If compression='infer',505the inferred compression method is returned. Otherwise, the input506compression method is returned unchanged, unless it's invalid, in which507case an error is raised.508509Parameters510----------511filepath_or_buffer : str or file handle512File path or object.513{compression_options}514515.. versionchanged:: 1.4.0 Zstandard support.516517Returns518-------519string or None520521Raises522------523ValueError on invalid compression specified.524"""525if compression is None:526return None527528# Infer compression529if compression == "infer":530# Convert all path types (e.g. pathlib.Path) to strings531filepath_or_buffer = stringify_path(filepath_or_buffer, convert_file_like=True)532if not isinstance(filepath_or_buffer, str):533# Cannot infer compression of a buffer, assume no compression534return None535536# Infer compression from the filename/URL extension537for compression, extension in _compression_to_extension.items():538if filepath_or_buffer.lower().endswith(extension):539return compression540return None541542# Compression has been specified. Check that it's valid543if compression in _compression_to_extension:544return compression545546# https://github.com/python/mypy/issues/5492547# Unsupported operand types for + ("List[Optional[str]]" and "List[str]")548valid = ["infer", None] + sorted(549_compression_to_extension550) # type: ignore[operator]551msg = (552f"Unrecognized compression type: {compression}\n"553f"Valid compression types are {valid}"554)555raise ValueError(msg)556557558def check_parent_directory(path: Path | str) -> None:559"""560Check if parent directory of a file exists, raise OSError if it does not561562Parameters563----------564path: Path or str565Path to check parent directory of566567"""568parent = Path(path).parent569if not parent.is_dir():570raise OSError(rf"Cannot save file into a non-existent directory: '{parent}'")571572573@overload574def get_handle(575path_or_buf: FilePath | BaseBuffer,576mode: str,577*,578encoding: str | None = ...,579compression: CompressionOptions = ...,580memory_map: bool = ...,581is_text: Literal[False],582errors: str | None = ...,583storage_options: StorageOptions = ...,584) -> IOHandles[bytes]:585...586587588@overload589def get_handle(590path_or_buf: FilePath | BaseBuffer,591mode: str,592*,593encoding: str | None = ...,594compression: CompressionOptions = ...,595memory_map: bool = ...,596is_text: Literal[True] = ...,597errors: str | None = ...,598storage_options: StorageOptions = ...,599) -> IOHandles[str]:600...601602603@doc(compression_options=_shared_docs["compression_options"] % "path_or_buf")604def get_handle(605path_or_buf: FilePath | BaseBuffer,606mode: str,607*,608encoding: str | None = None,609compression: CompressionOptions = None,610memory_map: bool = False,611is_text: bool = True,612errors: str | None = None,613storage_options: StorageOptions = None,614) -> IOHandles[str] | IOHandles[bytes]:615"""616Get file handle for given path/buffer and mode.617618Parameters619----------620path_or_buf : str or file handle621File path or object.622mode : str623Mode to open path_or_buf with.624encoding : str or None625Encoding to use.626{compression_options}627628.. versionchanged:: 1.0.0629May now be a dict with key 'method' as compression mode630and other keys as compression options if compression631mode is 'zip'.632633.. versionchanged:: 1.1.0634Passing compression options as keys in dict is now635supported for compression modes 'gzip', 'bz2', 'zstd' and 'zip'.636637.. versionchanged:: 1.4.0 Zstandard support.638639memory_map : bool, default False640See parsers._parser_params for more information.641is_text : bool, default True642Whether the type of the content passed to the file/buffer is string or643bytes. This is not the same as `"b" not in mode`. If a string content is644passed to a binary file/buffer, a wrapper is inserted.645errors : str, default 'strict'646Specifies how encoding and decoding errors are to be handled.647See the errors argument for :func:`open` for a full list648of options.649storage_options: StorageOptions = None650Passed to _get_filepath_or_buffer651652.. versionchanged:: 1.2.0653654Returns the dataclass IOHandles655"""656# Windows does not default to utf-8. Set to utf-8 for a consistent behavior657encoding = encoding or "utf-8"658659# read_csv does not know whether the buffer is opened in binary/text mode660if _is_binary_mode(path_or_buf, mode) and "b" not in mode:661mode += "b"662663# validate encoding and errors664codecs.lookup(encoding)665if isinstance(errors, str):666codecs.lookup_error(errors)667668# open URLs669ioargs = _get_filepath_or_buffer(670path_or_buf,671encoding=encoding,672compression=compression,673mode=mode,674storage_options=storage_options,675)676677handle = ioargs.filepath_or_buffer678handles: list[BaseBuffer]679680# memory mapping needs to be the first step681handle, memory_map, handles = _maybe_memory_map(682handle,683memory_map,684ioargs.encoding,685ioargs.mode,686errors,687ioargs.compression["method"] not in _compression_to_extension,688)689690is_path = isinstance(handle, str)691compression_args = dict(ioargs.compression)692compression = compression_args.pop("method")693694# Only for write methods695if "r" not in mode and is_path:696check_parent_directory(str(handle))697698if compression:699if compression != "zstd":700# compression libraries do not like an explicit text-mode701ioargs.mode = ioargs.mode.replace("t", "")702elif compression == "zstd" and "b" not in ioargs.mode:703# python-zstandard defaults to text mode, but we always expect704# compression libraries to use binary mode.705ioargs.mode += "b"706707# GZ Compression708if compression == "gzip":709if is_path:710assert isinstance(handle, str)711# error: Incompatible types in assignment (expression has type712# "GzipFile", variable has type "Union[str, BaseBuffer]")713handle = gzip.GzipFile( # type: ignore[assignment]714filename=handle,715mode=ioargs.mode,716**compression_args,717)718else:719handle = gzip.GzipFile(720# No overload variant of "GzipFile" matches argument types721# "Union[str, BaseBuffer]", "str", "Dict[str, Any]"722fileobj=handle, # type: ignore[call-overload]723mode=ioargs.mode,724**compression_args,725)726727# BZ Compression728elif compression == "bz2":729# No overload variant of "BZ2File" matches argument types730# "Union[str, BaseBuffer]", "str", "Dict[str, Any]"731handle = bz2.BZ2File( # type: ignore[call-overload]732handle,733mode=ioargs.mode,734**compression_args,735)736737# ZIP Compression738elif compression == "zip":739# error: Argument 1 to "_BytesZipFile" has incompatible type "Union[str,740# BaseBuffer]"; expected "Union[Union[str, PathLike[str]],741# ReadBuffer[bytes], WriteBuffer[bytes]]"742handle = _BytesZipFile(743handle, ioargs.mode, **compression_args # type: ignore[arg-type]744)745if handle.mode == "r":746handles.append(handle)747zip_names = handle.namelist()748if len(zip_names) == 1:749handle = handle.open(zip_names.pop())750elif len(zip_names) == 0:751raise ValueError(f"Zero files found in ZIP file {path_or_buf}")752else:753raise ValueError(754"Multiple files found in ZIP file. "755f"Only one file per ZIP: {zip_names}"756)757758# XZ Compression759elif compression == "xz":760handle = get_lzma_file()(handle, ioargs.mode)761762# Zstd Compression763elif compression == "zstd":764zstd = import_optional_dependency("zstandard")765if "r" in ioargs.mode:766open_args = {"dctx": zstd.ZstdDecompressor(**compression_args)}767else:768open_args = {"cctx": zstd.ZstdCompressor(**compression_args)}769handle = zstd.open(770handle,771mode=ioargs.mode,772**open_args,773)774775# Unrecognized Compression776else:777msg = f"Unrecognized compression type: {compression}"778raise ValueError(msg)779780assert not isinstance(handle, str)781handles.append(handle)782783elif isinstance(handle, str):784# Check whether the filename is to be opened in binary mode.785# Binary mode does not support 'encoding' and 'newline'.786if ioargs.encoding and "b" not in ioargs.mode:787# Encoding788handle = open(789handle,790ioargs.mode,791encoding=ioargs.encoding,792errors=errors,793newline="",794)795else:796# Binary mode797handle = open(handle, ioargs.mode)798handles.append(handle)799800# Convert BytesIO or file objects passed with an encoding801is_wrapped = False802if not is_text and ioargs.mode == "rb" and isinstance(handle, TextIOBase):803# not added to handles as it does not open/buffer resources804handle = _BytesIOWrapper(805handle,806encoding=ioargs.encoding,807)808elif is_text and (compression or _is_binary_mode(handle, ioargs.mode)):809handle = TextIOWrapper(810# error: Argument 1 to "TextIOWrapper" has incompatible type811# "Union[IO[bytes], IO[Any], RawIOBase, BufferedIOBase, TextIOBase, mmap]";812# expected "IO[bytes]"813_IOWrapper(handle), # type: ignore[arg-type]814encoding=ioargs.encoding,815errors=errors,816newline="",817)818handles.append(handle)819# only marked as wrapped when the caller provided a handle820is_wrapped = not (821isinstance(ioargs.filepath_or_buffer, str) or ioargs.should_close822)823824if "r" in ioargs.mode and not hasattr(handle, "read"):825raise TypeError(826"Expected file path name or file-like object, "827f"got {type(ioargs.filepath_or_buffer)} type"828)829830handles.reverse() # close the most recently added buffer first831if ioargs.should_close:832assert not isinstance(ioargs.filepath_or_buffer, str)833handles.append(ioargs.filepath_or_buffer)834835return IOHandles(836# error: Argument "handle" to "IOHandles" has incompatible type837# "Union[TextIOWrapper, GzipFile, BaseBuffer, typing.IO[bytes],838# typing.IO[Any]]"; expected "pandas._typing.IO[Any]"839handle=handle, # type: ignore[arg-type]840# error: Argument "created_handles" to "IOHandles" has incompatible type841# "List[BaseBuffer]"; expected "List[Union[IO[bytes], IO[str]]]"842created_handles=handles, # type: ignore[arg-type]843is_wrapped=is_wrapped,844is_mmap=memory_map,845compression=ioargs.compression,846)847848849# error: Definition of "__exit__" in base class "ZipFile" is incompatible with850# definition in base class "BytesIO" [misc]851# error: Definition of "__enter__" in base class "ZipFile" is incompatible with852# definition in base class "BytesIO" [misc]853# error: Definition of "__enter__" in base class "ZipFile" is incompatible with854# definition in base class "BinaryIO" [misc]855# error: Definition of "__enter__" in base class "ZipFile" is incompatible with856# definition in base class "IO" [misc]857# error: Definition of "read" in base class "ZipFile" is incompatible with858# definition in base class "BytesIO" [misc]859# error: Definition of "read" in base class "ZipFile" is incompatible with860# definition in base class "IO" [misc]861class _BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore[misc]862"""863Wrapper for standard library class ZipFile and allow the returned file-like864handle to accept byte strings via `write` method.865866BytesIO provides attributes of file-like object and ZipFile.writestr writes867bytes strings into a member of the archive.868"""869870# GH 17778871def __init__(872self,873file: FilePath | ReadBuffer[bytes] | WriteBuffer[bytes],874mode: str,875archive_name: str | None = None,876**kwargs,877):878mode = mode.replace("b", "")879self.archive_name = archive_name880self.multiple_write_buffer: StringIO | BytesIO | None = None881882kwargs_zip: dict[str, Any] = {"compression": zipfile.ZIP_DEFLATED}883kwargs_zip.update(kwargs)884885# error: Argument 1 to "__init__" of "ZipFile" has incompatible type886# "Union[_PathLike[str], Union[str, Union[IO[Any], RawIOBase, BufferedIOBase,887# TextIOBase, TextIOWrapper, mmap]]]"; expected "Union[Union[str,888# _PathLike[str]], IO[bytes]]"889super().__init__(file, mode, **kwargs_zip) # type: ignore[arg-type]890891def infer_filename(self):892"""893If an explicit archive_name is not given, we still want the file inside the zip894file not to be named something.zip, because that causes confusion (GH39465).895"""896if isinstance(self.filename, (os.PathLike, str)):897filename = Path(self.filename)898if filename.suffix == ".zip":899return filename.with_suffix("").name900return filename.name901return None902903def write(self, data):904# buffer multiple write calls, write on flush905if self.multiple_write_buffer is None:906self.multiple_write_buffer = (907BytesIO() if isinstance(data, bytes) else StringIO()908)909self.multiple_write_buffer.write(data)910911def flush(self) -> None:912# write to actual handle and close write buffer913if self.multiple_write_buffer is None or self.multiple_write_buffer.closed:914return915916# ZipFile needs a non-empty string917archive_name = self.archive_name or self.infer_filename() or "zip"918with self.multiple_write_buffer:919super().writestr(archive_name, self.multiple_write_buffer.getvalue())920921def close(self):922self.flush()923super().close()924925@property926def closed(self):927return self.fp is None928929930class _MMapWrapper(abc.Iterator):931"""932Wrapper for the Python's mmap class so that it can be properly read in933by Python's csv.reader class.934935Parameters936----------937f : file object938File object to be mapped onto memory. Must support the 'fileno'939method or have an equivalent attribute940941"""942943def __init__(944self,945f: IO,946encoding: str = "utf-8",947errors: str = "strict",948decode: bool = True,949):950self.encoding = encoding951self.errors = errors952self.decoder = codecs.getincrementaldecoder(encoding)(errors=errors)953self.decode = decode954955self.attributes = {}956for attribute in ("seekable", "readable"):957if not hasattr(f, attribute):958continue959self.attributes[attribute] = getattr(f, attribute)()960self.mmap = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)961962def __getattr__(self, name: str):963if name in self.attributes:964return lambda: self.attributes[name]965return getattr(self.mmap, name)966967def __iter__(self) -> _MMapWrapper:968return self969970def read(self, size: int = -1) -> str | bytes:971# CSV c-engine uses read instead of iterating972content: bytes = self.mmap.read(size)973if self.decode and self.encoding != "utf-8":974# memory mapping is applied before compression. Encoding should975# be applied to the de-compressed data.976final = size == -1 or len(content) < size977return self.decoder.decode(content, final=final)978return content979980def __next__(self) -> str:981newbytes = self.mmap.readline()982983# readline returns bytes, not str, but Python's CSV reader984# expects str, so convert the output to str before continuing985newline = self.decoder.decode(newbytes)986987# mmap doesn't raise if reading past the allocated988# data but instead returns an empty string, so raise989# if that is returned990if newline == "":991raise StopIteration992993# IncrementalDecoder seems to push newline to the next line994return newline.lstrip("\n")995996997class _IOWrapper:998# TextIOWrapper is overly strict: it request that the buffer has seekable, readable,999# and writable. If we have a read-only buffer, we shouldn't need writable and vice1000# versa. Some buffers, are seek/read/writ-able but they do not have the "-able"1001# methods, e.g., tempfile.SpooledTemporaryFile.1002# If a buffer does not have the above "-able" methods, we simple assume they are1003# seek/read/writ-able.1004def __init__(self, buffer: BaseBuffer):1005self.buffer = buffer10061007def __getattr__(self, name: str):1008return getattr(self.buffer, name)10091010def readable(self) -> bool:1011if hasattr(self.buffer, "readable"):1012# error: "BaseBuffer" has no attribute "readable"1013return self.buffer.readable() # type: ignore[attr-defined]1014return True10151016def seekable(self) -> bool:1017if hasattr(self.buffer, "seekable"):1018return self.buffer.seekable()1019return True10201021def writable(self) -> bool:1022if hasattr(self.buffer, "writable"):1023# error: "BaseBuffer" has no attribute "writable"1024return self.buffer.writable() # type: ignore[attr-defined]1025return True102610271028class _BytesIOWrapper:1029# Wrapper that wraps a StringIO buffer and reads bytes from it1030# Created for compat with pyarrow read_csv1031def __init__(self, buffer: StringIO | TextIOBase, encoding: str = "utf-8"):1032self.buffer = buffer1033self.encoding = encoding1034# Because a character can be represented by more than 1 byte,1035# it is possible that reading will produce more bytes than n1036# We store the extra bytes in this overflow variable, and append the1037# overflow to the front of the bytestring the next time reading is performed1038self.overflow = b""10391040def __getattr__(self, attr: str):1041return getattr(self.buffer, attr)10421043def read(self, n: int | None = -1) -> bytes:1044assert self.buffer is not None1045bytestring = self.buffer.read(n).encode(self.encoding)1046# When n=-1/n greater than remaining bytes: Read entire file/rest of file1047combined_bytestring = self.overflow + bytestring1048if n is None or n < 0 or n >= len(combined_bytestring):1049self.overflow = b""1050return combined_bytestring1051else:1052to_return = combined_bytestring[:n]1053self.overflow = combined_bytestring[n:]1054return to_return105510561057def _maybe_memory_map(1058handle: str | BaseBuffer,1059memory_map: bool,1060encoding: str,1061mode: str,1062errors: str | None,1063decode: bool,1064) -> tuple[str | BaseBuffer, bool, list[BaseBuffer]]:1065"""Try to memory map file/buffer."""1066handles: list[BaseBuffer] = []1067memory_map &= hasattr(handle, "fileno") or isinstance(handle, str)1068if not memory_map:1069return handle, memory_map, handles10701071# need to open the file first1072if isinstance(handle, str):1073if encoding and "b" not in mode:1074# Encoding1075handle = open(handle, mode, encoding=encoding, errors=errors, newline="")1076else:1077# Binary mode1078handle = open(handle, mode)1079handles.append(handle)10801081# error: Argument 1 to "_MMapWrapper" has incompatible type "Union[IO[Any],1082# RawIOBase, BufferedIOBase, TextIOBase, mmap]"; expected "IO[Any]"1083try:1084wrapped = cast(1085BaseBuffer,1086_MMapWrapper(handle, encoding, errors, decode), # type: ignore[arg-type]1087)1088finally:1089for handle in reversed(handles):1090# error: "BaseBuffer" has no attribute "close"1091handle.close() # type: ignore[attr-defined]1092handles.append(wrapped)10931094return wrapped, memory_map, handles109510961097def file_exists(filepath_or_buffer: FilePath | BaseBuffer) -> bool:1098"""Test whether file exists."""1099exists = False1100filepath_or_buffer = stringify_path(filepath_or_buffer)1101if not isinstance(filepath_or_buffer, str):1102return exists1103try:1104exists = os.path.exists(filepath_or_buffer)1105# gh-5874: if the filepath is too long will raise here1106except (TypeError, ValueError):1107pass1108return exists110911101111def _is_binary_mode(handle: FilePath | BaseBuffer, mode: str) -> bool:1112"""Whether the handle is opened in binary mode"""1113# specified by user1114if "t" in mode or "b" in mode:1115return "b" in mode11161117# exceptions1118text_classes = (1119# classes that expect string but have 'b' in mode1120codecs.StreamWriter,1121codecs.StreamReader,1122codecs.StreamReaderWriter,1123)1124if issubclass(type(handle), text_classes):1125return False11261127return isinstance(handle, _get_binary_io_classes()) or "b" in getattr(1128handle, "mode", mode1129)113011311132@functools.lru_cache1133def _get_binary_io_classes() -> tuple[type, ...]:1134"""IO classes that that expect bytes"""1135binary_classes: tuple[type, ...] = (BufferedIOBase, RawIOBase)11361137# python-zstandard doesn't use any of the builtin base classes; instead we1138# have to use the `zstd.ZstdDecompressionReader` class for isinstance checks.1139# Unfortunately `zstd.ZstdDecompressionReader` isn't exposed by python-zstandard1140# so we have to get it from a `zstd.ZstdDecompressor` instance.1141# See also https://github.com/indygreg/python-zstandard/pull/165.1142zstd = import_optional_dependency("zstandard", errors="ignore")1143if zstd is not None:1144with zstd.ZstdDecompressor().stream_reader(b"") as reader:1145binary_classes += (type(reader),)11461147return binary_classes114811491150