Path: blob/master/ invest-robot-contest_TinkoffBotTwitch-main/venv/lib/python3.8/site-packages/pandas/io/pytables.py
7826 views
"""1High level interface to PyTables for reading and writing pandas data structures2to disk3"""4from __future__ import annotations56from contextlib import suppress7import copy8from datetime import (9date,10tzinfo,11)12import itertools13import os14import re15from textwrap import dedent16from typing import (17TYPE_CHECKING,18Any,19Callable,20Hashable,21Literal,22Sequence,23cast,24)25import warnings2627import numpy as np2829from pandas._config import (30config,31get_option,32)3334from pandas._libs import (35lib,36writers as libwriters,37)38from pandas._libs.tslibs import timezones39from pandas._typing import (40ArrayLike,41DtypeArg,42Shape,43)44from pandas.compat._optional import import_optional_dependency45from pandas.compat.pickle_compat import patch_pickle46from pandas.errors import PerformanceWarning47from pandas.util._decorators import cache_readonly48from pandas.util._exceptions import find_stack_level4950from pandas.core.dtypes.common import (51ensure_object,52is_categorical_dtype,53is_complex_dtype,54is_datetime64_dtype,55is_datetime64tz_dtype,56is_extension_array_dtype,57is_list_like,58is_string_dtype,59is_timedelta64_dtype,60needs_i8_conversion,61)62from pandas.core.dtypes.missing import array_equivalent6364from pandas import (65DataFrame,66DatetimeIndex,67Index,68MultiIndex,69PeriodIndex,70Series,71TimedeltaIndex,72concat,73isna,74)75from pandas.core.api import Int64Index76from pandas.core.arrays import (77Categorical,78DatetimeArray,79PeriodArray,80)81import pandas.core.common as com82from pandas.core.computation.pytables import (83PyTablesExpr,84maybe_expression,85)86from pandas.core.construction import extract_array87from pandas.core.indexes.api import ensure_index88from pandas.core.internals import (89ArrayManager,90BlockManager,91)9293from pandas.io.common import stringify_path94from pandas.io.formats.printing import (95adjoin,96pprint_thing,97)9899if TYPE_CHECKING:100from tables import (101Col,102File,103Node,104)105106from pandas.core.internals import Block107108109# versioning attribute110_version = "0.15.2"111112# encoding113_default_encoding = "UTF-8"114115116def _ensure_decoded(s):117"""if we have bytes, decode them to unicode"""118if isinstance(s, np.bytes_):119s = s.decode("UTF-8")120return s121122123def _ensure_encoding(encoding):124# set the encoding if we need125if encoding is None:126encoding = _default_encoding127128return encoding129130131def _ensure_str(name):132"""133Ensure that an index / column name is a str (python 3); otherwise they134may be np.string dtype. Non-string dtypes are passed through unchanged.135136https://github.com/pandas-dev/pandas/issues/13492137"""138if isinstance(name, str):139name = str(name)140return name141142143Term = PyTablesExpr144145146def _ensure_term(where, scope_level: int):147"""148Ensure that the where is a Term or a list of Term.149150This makes sure that we are capturing the scope of variables that are151passed create the terms here with a frame_level=2 (we are 2 levels down)152"""153# only consider list/tuple here as an ndarray is automatically a coordinate154# list155level = scope_level + 1156if isinstance(where, (list, tuple)):157where = [158Term(term, scope_level=level + 1) if maybe_expression(term) else term159for term in where160if term is not None161]162elif maybe_expression(where):163where = Term(where, scope_level=level)164return where if where is None or len(where) else None165166167class PossibleDataLossError(Exception):168pass169170171class ClosedFileError(Exception):172pass173174175class IncompatibilityWarning(Warning):176pass177178179incompatibility_doc = """180where criteria is being ignored as this version [%s] is too old (or181not-defined), read the file in and write it out to a new file to upgrade (with182the copy_to method)183"""184185186class AttributeConflictWarning(Warning):187pass188189190attribute_conflict_doc = """191the [%s] attribute of the existing index is [%s] which conflicts with the new192[%s], resetting the attribute to None193"""194195196class DuplicateWarning(Warning):197pass198199200duplicate_doc = """201duplicate entries in table, taking most recently appended202"""203204performance_doc = """205your performance may suffer as PyTables will pickle object types that it cannot206map directly to c-types [inferred_type->%s,key->%s] [items->%s]207"""208209# formats210_FORMAT_MAP = {"f": "fixed", "fixed": "fixed", "t": "table", "table": "table"}211212# axes map213_AXES_MAP = {DataFrame: [0]}214215# register our configuration options216dropna_doc = """217: boolean218drop ALL nan rows when appending to a table219"""220format_doc = """221: format222default format writing format, if None, then223put will default to 'fixed' and append will default to 'table'224"""225226with config.config_prefix("io.hdf"):227config.register_option("dropna_table", False, dropna_doc, validator=config.is_bool)228config.register_option(229"default_format",230None,231format_doc,232validator=config.is_one_of_factory(["fixed", "table", None]),233)234235# oh the troubles to reduce import time236_table_mod = None237_table_file_open_policy_is_strict = False238239240def _tables():241global _table_mod242global _table_file_open_policy_is_strict243if _table_mod is None:244import tables245246_table_mod = tables247248# set the file open policy249# return the file open policy; this changes as of pytables 3.1250# depending on the HDF5 version251with suppress(AttributeError):252_table_file_open_policy_is_strict = (253tables.file._FILE_OPEN_POLICY == "strict"254)255256return _table_mod257258259# interface to/from ###260261262def to_hdf(263path_or_buf,264key: str,265value: DataFrame | Series,266mode: str = "a",267complevel: int | None = None,268complib: str | None = None,269append: bool = False,270format: str | None = None,271index: bool = True,272min_itemsize: int | dict[str, int] | None = None,273nan_rep=None,274dropna: bool | None = None,275data_columns: Literal[True] | list[str] | None = None,276errors: str = "strict",277encoding: str = "UTF-8",278) -> None:279"""store this object, close it if we opened it"""280if append:281f = lambda store: store.append(282key,283value,284format=format,285index=index,286min_itemsize=min_itemsize,287nan_rep=nan_rep,288dropna=dropna,289data_columns=data_columns,290errors=errors,291encoding=encoding,292)293else:294# NB: dropna is not passed to `put`295f = lambda store: store.put(296key,297value,298format=format,299index=index,300min_itemsize=min_itemsize,301nan_rep=nan_rep,302data_columns=data_columns,303errors=errors,304encoding=encoding,305dropna=dropna,306)307308path_or_buf = stringify_path(path_or_buf)309if isinstance(path_or_buf, str):310with HDFStore(311path_or_buf, mode=mode, complevel=complevel, complib=complib312) as store:313f(store)314else:315f(path_or_buf)316317318def read_hdf(319path_or_buf,320key=None,321mode: str = "r",322errors: str = "strict",323where=None,324start: int | None = None,325stop: int | None = None,326columns=None,327iterator=False,328chunksize: int | None = None,329**kwargs,330):331"""332Read from the store, close it if we opened it.333334Retrieve pandas object stored in file, optionally based on where335criteria.336337.. warning::338339Pandas uses PyTables for reading and writing HDF5 files, which allows340serializing object-dtype data with pickle when using the "fixed" format.341Loading pickled data received from untrusted sources can be unsafe.342343See: https://docs.python.org/3/library/pickle.html for more.344345Parameters346----------347path_or_buf : str, path object, pandas.HDFStore348Any valid string path is acceptable. Only supports the local file system,349remote URLs and file-like objects are not supported.350351If you want to pass in a path object, pandas accepts any352``os.PathLike``.353354Alternatively, pandas accepts an open :class:`pandas.HDFStore` object.355356key : object, optional357The group identifier in the store. Can be omitted if the HDF file358contains a single pandas object.359mode : {'r', 'r+', 'a'}, default 'r'360Mode to use when opening the file. Ignored if path_or_buf is a361:class:`pandas.HDFStore`. Default is 'r'.362errors : str, default 'strict'363Specifies how encoding and decoding errors are to be handled.364See the errors argument for :func:`open` for a full list365of options.366where : list, optional367A list of Term (or convertible) objects.368start : int, optional369Row number to start selection.370stop : int, optional371Row number to stop selection.372columns : list, optional373A list of columns names to return.374iterator : bool, optional375Return an iterator object.376chunksize : int, optional377Number of rows to include in an iteration when using an iterator.378**kwargs379Additional keyword arguments passed to HDFStore.380381Returns382-------383item : object384The selected object. Return type depends on the object stored.385386See Also387--------388DataFrame.to_hdf : Write a HDF file from a DataFrame.389HDFStore : Low-level access to HDF files.390391Examples392--------393>>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z']) # doctest: +SKIP394>>> df.to_hdf('./store.h5', 'data') # doctest: +SKIP395>>> reread = pd.read_hdf('./store.h5') # doctest: +SKIP396"""397if mode not in ["r", "r+", "a"]:398raise ValueError(399f"mode {mode} is not allowed while performing a read. "400f"Allowed modes are r, r+ and a."401)402# grab the scope403if where is not None:404where = _ensure_term(where, scope_level=1)405406if isinstance(path_or_buf, HDFStore):407if not path_or_buf.is_open:408raise OSError("The HDFStore must be open for reading.")409410store = path_or_buf411auto_close = False412else:413path_or_buf = stringify_path(path_or_buf)414if not isinstance(path_or_buf, str):415raise NotImplementedError(416"Support for generic buffers has not been implemented."417)418try:419exists = os.path.exists(path_or_buf)420421# if filepath is too long422except (TypeError, ValueError):423exists = False424425if not exists:426raise FileNotFoundError(f"File {path_or_buf} does not exist")427428store = HDFStore(path_or_buf, mode=mode, errors=errors, **kwargs)429# can't auto open/close if we are using an iterator430# so delegate to the iterator431auto_close = True432433try:434if key is None:435groups = store.groups()436if len(groups) == 0:437raise ValueError(438"Dataset(s) incompatible with Pandas data types, "439"not table, or no datasets found in HDF5 file."440)441candidate_only_group = groups[0]442443# For the HDF file to have only one dataset, all other groups444# should then be metadata groups for that candidate group. (This445# assumes that the groups() method enumerates parent groups446# before their children.)447for group_to_check in groups[1:]:448if not _is_metadata_of(group_to_check, candidate_only_group):449raise ValueError(450"key must be provided when HDF5 "451"file contains multiple datasets."452)453key = candidate_only_group._v_pathname454return store.select(455key,456where=where,457start=start,458stop=stop,459columns=columns,460iterator=iterator,461chunksize=chunksize,462auto_close=auto_close,463)464except (ValueError, TypeError, KeyError):465if not isinstance(path_or_buf, HDFStore):466# if there is an error, close the store if we opened it.467with suppress(AttributeError):468store.close()469470raise471472473def _is_metadata_of(group: Node, parent_group: Node) -> bool:474"""Check if a given group is a metadata group for a given parent_group."""475if group._v_depth <= parent_group._v_depth:476return False477478current = group479while current._v_depth > 1:480parent = current._v_parent481if parent == parent_group and current._v_name == "meta":482return True483current = current._v_parent484return False485486487class HDFStore:488"""489Dict-like IO interface for storing pandas objects in PyTables.490491Either Fixed or Table format.492493.. warning::494495Pandas uses PyTables for reading and writing HDF5 files, which allows496serializing object-dtype data with pickle when using the "fixed" format.497Loading pickled data received from untrusted sources can be unsafe.498499See: https://docs.python.org/3/library/pickle.html for more.500501Parameters502----------503path : str504File path to HDF5 file.505mode : {'a', 'w', 'r', 'r+'}, default 'a'506507``'r'``508Read-only; no data can be modified.509``'w'``510Write; a new file is created (an existing file with the same511name would be deleted).512``'a'``513Append; an existing file is opened for reading and writing,514and if the file does not exist it is created.515``'r+'``516It is similar to ``'a'``, but the file must already exist.517complevel : int, 0-9, default None518Specifies a compression level for data.519A value of 0 or None disables compression.520complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'521Specifies the compression library to be used.522As of v0.20.2 these additional compressors for Blosc are supported523(default if no compressor specified: 'blosc:blosclz'):524{'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',525'blosc:zlib', 'blosc:zstd'}.526Specifying a compression library which is not available issues527a ValueError.528fletcher32 : bool, default False529If applying compression use the fletcher32 checksum.530**kwargs531These parameters will be passed to the PyTables open_file method.532533Examples534--------535>>> bar = pd.DataFrame(np.random.randn(10, 4))536>>> store = pd.HDFStore('test.h5')537>>> store['foo'] = bar # write to HDF5538>>> bar = store['foo'] # retrieve539>>> store.close()540541**Create or load HDF5 file in-memory**542543When passing the `driver` option to the PyTables open_file method through544**kwargs, the HDF5 file is loaded or created in-memory and will only be545written when closed:546547>>> bar = pd.DataFrame(np.random.randn(10, 4))548>>> store = pd.HDFStore('test.h5', driver='H5FD_CORE')549>>> store['foo'] = bar550>>> store.close() # only now, data is written to disk551"""552553_handle: File | None554_mode: str555_complevel: int556_fletcher32: bool557558def __init__(559self,560path,561mode: str = "a",562complevel: int | None = None,563complib=None,564fletcher32: bool = False,565**kwargs,566):567568if "format" in kwargs:569raise ValueError("format is not a defined argument for HDFStore")570571tables = import_optional_dependency("tables")572573if complib is not None and complib not in tables.filters.all_complibs:574raise ValueError(575f"complib only supports {tables.filters.all_complibs} compression."576)577578if complib is None and complevel is not None:579complib = tables.filters.default_complib580581self._path = stringify_path(path)582if mode is None:583mode = "a"584self._mode = mode585self._handle = None586self._complevel = complevel if complevel else 0587self._complib = complib588self._fletcher32 = fletcher32589self._filters = None590self.open(mode=mode, **kwargs)591592def __fspath__(self):593return self._path594595@property596def root(self):597"""return the root node"""598self._check_if_open()599assert self._handle is not None # for mypy600return self._handle.root601602@property603def filename(self):604return self._path605606def __getitem__(self, key: str):607return self.get(key)608609def __setitem__(self, key: str, value):610self.put(key, value)611612def __delitem__(self, key: str):613return self.remove(key)614615def __getattr__(self, name: str):616"""allow attribute access to get stores"""617try:618return self.get(name)619except (KeyError, ClosedFileError):620pass621raise AttributeError(622f"'{type(self).__name__}' object has no attribute '{name}'"623)624625def __contains__(self, key: str) -> bool:626"""627check for existence of this key628can match the exact pathname or the pathnm w/o the leading '/'629"""630node = self.get_node(key)631if node is not None:632name = node._v_pathname633if name == key or name[1:] == key:634return True635return False636637def __len__(self) -> int:638return len(self.groups())639640def __repr__(self) -> str:641pstr = pprint_thing(self._path)642return f"{type(self)}\nFile path: {pstr}\n"643644def __enter__(self):645return self646647def __exit__(self, exc_type, exc_value, traceback):648self.close()649650def keys(self, include: str = "pandas") -> list[str]:651"""652Return a list of keys corresponding to objects stored in HDFStore.653654Parameters655----------656657include : str, default 'pandas'658When kind equals 'pandas' return pandas objects.659When kind equals 'native' return native HDF5 Table objects.660661.. versionadded:: 1.1.0662663Returns664-------665list666List of ABSOLUTE path-names (e.g. have the leading '/').667668Raises669------670raises ValueError if kind has an illegal value671"""672if include == "pandas":673return [n._v_pathname for n in self.groups()]674675elif include == "native":676assert self._handle is not None # mypy677return [678n._v_pathname for n in self._handle.walk_nodes("/", classname="Table")679]680raise ValueError(681f"`include` should be either 'pandas' or 'native' but is '{include}'"682)683684def __iter__(self):685return iter(self.keys())686687def items(self):688"""689iterate on key->group690"""691for g in self.groups():692yield g._v_pathname, g693694iteritems = items695696def open(self, mode: str = "a", **kwargs):697"""698Open the file in the specified mode699700Parameters701----------702mode : {'a', 'w', 'r', 'r+'}, default 'a'703See HDFStore docstring or tables.open_file for info about modes704**kwargs705These parameters will be passed to the PyTables open_file method.706"""707tables = _tables()708709if self._mode != mode:710# if we are changing a write mode to read, ok711if self._mode in ["a", "w"] and mode in ["r", "r+"]:712pass713elif mode in ["w"]:714# this would truncate, raise here715if self.is_open:716raise PossibleDataLossError(717f"Re-opening the file [{self._path}] with mode [{self._mode}] "718"will delete the current file!"719)720721self._mode = mode722723# close and reopen the handle724if self.is_open:725self.close()726727if self._complevel and self._complevel > 0:728self._filters = _tables().Filters(729self._complevel, self._complib, fletcher32=self._fletcher32730)731732if _table_file_open_policy_is_strict and self.is_open:733msg = (734"Cannot open HDF5 file, which is already opened, "735"even in read-only mode."736)737raise ValueError(msg)738739self._handle = tables.open_file(self._path, self._mode, **kwargs)740741def close(self):742"""743Close the PyTables file handle744"""745if self._handle is not None:746self._handle.close()747self._handle = None748749@property750def is_open(self) -> bool:751"""752return a boolean indicating whether the file is open753"""754if self._handle is None:755return False756return bool(self._handle.isopen)757758def flush(self, fsync: bool = False):759"""760Force all buffered modifications to be written to disk.761762Parameters763----------764fsync : bool (default False)765call ``os.fsync()`` on the file handle to force writing to disk.766767Notes768-----769Without ``fsync=True``, flushing may not guarantee that the OS writes770to disk. With fsync, the operation will block until the OS claims the771file has been written; however, other caching layers may still772interfere.773"""774if self._handle is not None:775self._handle.flush()776if fsync:777with suppress(OSError):778os.fsync(self._handle.fileno())779780def get(self, key: str):781"""782Retrieve pandas object stored in file.783784Parameters785----------786key : str787788Returns789-------790object791Same type as object stored in file.792"""793with patch_pickle():794# GH#31167 Without this patch, pickle doesn't know how to unpickle795# old DateOffset objects now that they are cdef classes.796group = self.get_node(key)797if group is None:798raise KeyError(f"No object named {key} in the file")799return self._read_group(group)800801def select(802self,803key: str,804where=None,805start=None,806stop=None,807columns=None,808iterator=False,809chunksize=None,810auto_close: bool = False,811):812"""813Retrieve pandas object stored in file, optionally based on where criteria.814815.. warning::816817Pandas uses PyTables for reading and writing HDF5 files, which allows818serializing object-dtype data with pickle when using the "fixed" format.819Loading pickled data received from untrusted sources can be unsafe.820821See: https://docs.python.org/3/library/pickle.html for more.822823Parameters824----------825key : str826Object being retrieved from file.827where : list or None828List of Term (or convertible) objects, optional.829start : int or None830Row number to start selection.831stop : int, default None832Row number to stop selection.833columns : list or None834A list of columns that if not None, will limit the return columns.835iterator : bool or False836Returns an iterator.837chunksize : int or None838Number or rows to include in iteration, return an iterator.839auto_close : bool or False840Should automatically close the store when finished.841842Returns843-------844object845Retrieved object from file.846"""847group = self.get_node(key)848if group is None:849raise KeyError(f"No object named {key} in the file")850851# create the storer and axes852where = _ensure_term(where, scope_level=1)853s = self._create_storer(group)854s.infer_axes()855856# function to call on iteration857def func(_start, _stop, _where):858return s.read(start=_start, stop=_stop, where=_where, columns=columns)859860# create the iterator861it = TableIterator(862self,863s,864func,865where=where,866nrows=s.nrows,867start=start,868stop=stop,869iterator=iterator,870chunksize=chunksize,871auto_close=auto_close,872)873874return it.get_result()875876def select_as_coordinates(877self,878key: str,879where=None,880start: int | None = None,881stop: int | None = None,882):883"""884return the selection as an Index885886.. warning::887888Pandas uses PyTables for reading and writing HDF5 files, which allows889serializing object-dtype data with pickle when using the "fixed" format.890Loading pickled data received from untrusted sources can be unsafe.891892See: https://docs.python.org/3/library/pickle.html for more.893894895Parameters896----------897key : str898where : list of Term (or convertible) objects, optional899start : integer (defaults to None), row number to start selection900stop : integer (defaults to None), row number to stop selection901"""902where = _ensure_term(where, scope_level=1)903tbl = self.get_storer(key)904if not isinstance(tbl, Table):905raise TypeError("can only read_coordinates with a table")906return tbl.read_coordinates(where=where, start=start, stop=stop)907908def select_column(909self,910key: str,911column: str,912start: int | None = None,913stop: int | None = None,914):915"""916return a single column from the table. This is generally only useful to917select an indexable918919.. warning::920921Pandas uses PyTables for reading and writing HDF5 files, which allows922serializing object-dtype data with pickle when using the "fixed" format.923Loading pickled data received from untrusted sources can be unsafe.924925See: https://docs.python.org/3/library/pickle.html for more.926927Parameters928----------929key : str930column : str931The column of interest.932start : int or None, default None933stop : int or None, default None934935Raises936------937raises KeyError if the column is not found (or key is not a valid938store)939raises ValueError if the column can not be extracted individually (it940is part of a data block)941942"""943tbl = self.get_storer(key)944if not isinstance(tbl, Table):945raise TypeError("can only read_column with a table")946return tbl.read_column(column=column, start=start, stop=stop)947948def select_as_multiple(949self,950keys,951where=None,952selector=None,953columns=None,954start=None,955stop=None,956iterator=False,957chunksize=None,958auto_close: bool = False,959):960"""961Retrieve pandas objects from multiple tables.962963.. warning::964965Pandas uses PyTables for reading and writing HDF5 files, which allows966serializing object-dtype data with pickle when using the "fixed" format.967Loading pickled data received from untrusted sources can be unsafe.968969See: https://docs.python.org/3/library/pickle.html for more.970971Parameters972----------973keys : a list of the tables974selector : the table to apply the where criteria (defaults to keys[0]975if not supplied)976columns : the columns I want back977start : integer (defaults to None), row number to start selection978stop : integer (defaults to None), row number to stop selection979iterator : bool, return an iterator, default False980chunksize : nrows to include in iteration, return an iterator981auto_close : bool, default False982Should automatically close the store when finished.983984Raises985------986raises KeyError if keys or selector is not found or keys is empty987raises TypeError if keys is not a list or tuple988raises ValueError if the tables are not ALL THE SAME DIMENSIONS989"""990# default to single select991where = _ensure_term(where, scope_level=1)992if isinstance(keys, (list, tuple)) and len(keys) == 1:993keys = keys[0]994if isinstance(keys, str):995return self.select(996key=keys,997where=where,998columns=columns,999start=start,1000stop=stop,1001iterator=iterator,1002chunksize=chunksize,1003auto_close=auto_close,1004)10051006if not isinstance(keys, (list, tuple)):1007raise TypeError("keys must be a list/tuple")10081009if not len(keys):1010raise ValueError("keys must have a non-zero length")10111012if selector is None:1013selector = keys[0]10141015# collect the tables1016tbls = [self.get_storer(k) for k in keys]1017s = self.get_storer(selector)10181019# validate rows1020nrows = None1021for t, k in itertools.chain([(s, selector)], zip(tbls, keys)):1022if t is None:1023raise KeyError(f"Invalid table [{k}]")1024if not t.is_table:1025raise TypeError(1026f"object [{t.pathname}] is not a table, and cannot be used in all "1027"select as multiple"1028)10291030if nrows is None:1031nrows = t.nrows1032elif t.nrows != nrows:1033raise ValueError("all tables must have exactly the same nrows!")10341035# The isinstance checks here are redundant with the check above,1036# but necessary for mypy; see GH#297571037_tbls = [x for x in tbls if isinstance(x, Table)]10381039# axis is the concentration axes1040axis = list({t.non_index_axes[0][0] for t in _tbls})[0]10411042def func(_start, _stop, _where):10431044# retrieve the objs, _where is always passed as a set of1045# coordinates here1046objs = [1047t.read(where=_where, columns=columns, start=_start, stop=_stop)1048for t in tbls1049]10501051# concat and return1052return concat(objs, axis=axis, verify_integrity=False)._consolidate()10531054# create the iterator1055it = TableIterator(1056self,1057s,1058func,1059where=where,1060nrows=nrows,1061start=start,1062stop=stop,1063iterator=iterator,1064chunksize=chunksize,1065auto_close=auto_close,1066)10671068return it.get_result(coordinates=True)10691070def put(1071self,1072key: str,1073value: DataFrame | Series,1074format=None,1075index=True,1076append=False,1077complib=None,1078complevel: int | None = None,1079min_itemsize: int | dict[str, int] | None = None,1080nan_rep=None,1081data_columns: Literal[True] | list[str] | None = None,1082encoding=None,1083errors: str = "strict",1084track_times: bool = True,1085dropna: bool = False,1086):1087"""1088Store object in HDFStore.10891090Parameters1091----------1092key : str1093value : {Series, DataFrame}1094format : 'fixed(f)|table(t)', default is 'fixed'1095Format to use when storing object in HDFStore. Value can be one of:10961097``'fixed'``1098Fixed format. Fast writing/reading. Not-appendable, nor searchable.1099``'table'``1100Table format. Write as a PyTables Table structure which may perform1101worse but allow more flexible operations like searching / selecting1102subsets of the data.1103append : bool, default False1104This will force Table format, append the input data to the existing.1105data_columns : list of columns or True, default None1106List of columns to create as data columns, or True to use all columns.1107See `here1108<https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.1109encoding : str, default None1110Provide an encoding for strings.1111track_times : bool, default True1112Parameter is propagated to 'create_table' method of 'PyTables'.1113If set to False it enables to have the same h5 files (same hashes)1114independent on creation time.11151116.. versionadded:: 1.1.01117"""1118if format is None:1119format = get_option("io.hdf.default_format") or "fixed"1120format = self._validate_format(format)1121self._write_to_group(1122key,1123value,1124format=format,1125index=index,1126append=append,1127complib=complib,1128complevel=complevel,1129min_itemsize=min_itemsize,1130nan_rep=nan_rep,1131data_columns=data_columns,1132encoding=encoding,1133errors=errors,1134track_times=track_times,1135dropna=dropna,1136)11371138def remove(self, key: str, where=None, start=None, stop=None):1139"""1140Remove pandas object partially by specifying the where condition11411142Parameters1143----------1144key : str1145Node to remove or delete rows from1146where : list of Term (or convertible) objects, optional1147start : integer (defaults to None), row number to start selection1148stop : integer (defaults to None), row number to stop selection11491150Returns1151-------1152number of rows removed (or None if not a Table)11531154Raises1155------1156raises KeyError if key is not a valid store11571158"""1159where = _ensure_term(where, scope_level=1)1160try:1161s = self.get_storer(key)1162except KeyError:1163# the key is not a valid store, re-raising KeyError1164raise1165except AssertionError:1166# surface any assertion errors for e.g. debugging1167raise1168except Exception as err:1169# In tests we get here with ClosedFileError, TypeError, and1170# _table_mod.NoSuchNodeError. TODO: Catch only these?11711172if where is not None:1173raise ValueError(1174"trying to remove a node with a non-None where clause!"1175) from err11761177# we are actually trying to remove a node (with children)1178node = self.get_node(key)1179if node is not None:1180node._f_remove(recursive=True)1181return None11821183# remove the node1184if com.all_none(where, start, stop):1185s.group._f_remove(recursive=True)11861187# delete from the table1188else:1189if not s.is_table:1190raise ValueError(1191"can only remove with where on objects written as tables"1192)1193return s.delete(where=where, start=start, stop=stop)11941195def append(1196self,1197key: str,1198value: DataFrame | Series,1199format=None,1200axes=None,1201index=True,1202append=True,1203complib=None,1204complevel: int | None = None,1205columns=None,1206min_itemsize: int | dict[str, int] | None = None,1207nan_rep=None,1208chunksize=None,1209expectedrows=None,1210dropna: bool | None = None,1211data_columns: Literal[True] | list[str] | None = None,1212encoding=None,1213errors: str = "strict",1214):1215"""1216Append to Table in file. Node must already exist and be Table1217format.12181219Parameters1220----------1221key : str1222value : {Series, DataFrame}1223format : 'table' is the default1224Format to use when storing object in HDFStore. Value can be one of:12251226``'table'``1227Table format. Write as a PyTables Table structure which may perform1228worse but allow more flexible operations like searching / selecting1229subsets of the data.1230append : bool, default True1231Append the input data to the existing.1232data_columns : list of columns, or True, default None1233List of columns to create as indexed data columns for on-disk1234queries, or True to use all columns. By default only the axes1235of the object are indexed. See `here1236<https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.1237min_itemsize : dict of columns that specify minimum str sizes1238nan_rep : str to use as str nan representation1239chunksize : size to chunk the writing1240expectedrows : expected TOTAL row size of this table1241encoding : default None, provide an encoding for str1242dropna : bool, default False1243Do not write an ALL nan row to the store settable1244by the option 'io.hdf.dropna_table'.12451246Notes1247-----1248Does *not* check if data being appended overlaps with existing1249data in the table, so be careful1250"""1251if columns is not None:1252raise TypeError(1253"columns is not a supported keyword in append, try data_columns"1254)12551256if dropna is None:1257dropna = get_option("io.hdf.dropna_table")1258if format is None:1259format = get_option("io.hdf.default_format") or "table"1260format = self._validate_format(format)1261self._write_to_group(1262key,1263value,1264format=format,1265axes=axes,1266index=index,1267append=append,1268complib=complib,1269complevel=complevel,1270min_itemsize=min_itemsize,1271nan_rep=nan_rep,1272chunksize=chunksize,1273expectedrows=expectedrows,1274dropna=dropna,1275data_columns=data_columns,1276encoding=encoding,1277errors=errors,1278)12791280def append_to_multiple(1281self,1282d: dict,1283value,1284selector,1285data_columns=None,1286axes=None,1287dropna=False,1288**kwargs,1289):1290"""1291Append to multiple tables12921293Parameters1294----------1295d : a dict of table_name to table_columns, None is acceptable as the1296values of one node (this will get all the remaining columns)1297value : a pandas object1298selector : a string that designates the indexable table; all of its1299columns will be designed as data_columns, unless data_columns is1300passed, in which case these are used1301data_columns : list of columns to create as data columns, or True to1302use all columns1303dropna : if evaluates to True, drop rows from all tables if any single1304row in each table has all NaN. Default False.13051306Notes1307-----1308axes parameter is currently not accepted13091310"""1311if axes is not None:1312raise TypeError(1313"axes is currently not accepted as a parameter to append_to_multiple; "1314"you can create the tables independently instead"1315)13161317if not isinstance(d, dict):1318raise ValueError(1319"append_to_multiple must have a dictionary specified as the "1320"way to split the value"1321)13221323if selector not in d:1324raise ValueError(1325"append_to_multiple requires a selector that is in passed dict"1326)13271328# figure out the splitting axis (the non_index_axis)1329axis = list(set(range(value.ndim)) - set(_AXES_MAP[type(value)]))[0]13301331# figure out how to split the value1332remain_key = None1333remain_values: list = []1334for k, v in d.items():1335if v is None:1336if remain_key is not None:1337raise ValueError(1338"append_to_multiple can only have one value in d that is None"1339)1340remain_key = k1341else:1342remain_values.extend(v)1343if remain_key is not None:1344ordered = value.axes[axis]1345ordd = ordered.difference(Index(remain_values))1346ordd = sorted(ordered.get_indexer(ordd))1347d[remain_key] = ordered.take(ordd)13481349# data_columns1350if data_columns is None:1351data_columns = d[selector]13521353# ensure rows are synchronized across the tables1354if dropna:1355idxs = (value[cols].dropna(how="all").index for cols in d.values())1356valid_index = next(idxs)1357for index in idxs:1358valid_index = valid_index.intersection(index)1359value = value.loc[valid_index]13601361min_itemsize = kwargs.pop("min_itemsize", None)13621363# append1364for k, v in d.items():1365dc = data_columns if k == selector else None13661367# compute the val1368val = value.reindex(v, axis=axis)13691370filtered = (1371{key: value for (key, value) in min_itemsize.items() if key in v}1372if min_itemsize is not None1373else None1374)1375self.append(k, val, data_columns=dc, min_itemsize=filtered, **kwargs)13761377def create_table_index(1378self,1379key: str,1380columns=None,1381optlevel: int | None = None,1382kind: str | None = None,1383):1384"""1385Create a pytables index on the table.13861387Parameters1388----------1389key : str1390columns : None, bool, or listlike[str]1391Indicate which columns to create an index on.13921393* False : Do not create any indexes.1394* True : Create indexes on all columns.1395* None : Create indexes on all columns.1396* listlike : Create indexes on the given columns.13971398optlevel : int or None, default None1399Optimization level, if None, pytables defaults to 6.1400kind : str or None, default None1401Kind of index, if None, pytables defaults to "medium".14021403Raises1404------1405TypeError: raises if the node is not a table1406"""1407# version requirements1408_tables()1409s = self.get_storer(key)1410if s is None:1411return14121413if not isinstance(s, Table):1414raise TypeError("cannot create table index on a Fixed format store")1415s.create_index(columns=columns, optlevel=optlevel, kind=kind)14161417def groups(self):1418"""1419Return a list of all the top-level nodes.14201421Each node returned is not a pandas storage object.14221423Returns1424-------1425list1426List of objects.1427"""1428_tables()1429self._check_if_open()1430assert self._handle is not None # for mypy1431assert _table_mod is not None # for mypy1432return [1433g1434for g in self._handle.walk_groups()1435if (1436not isinstance(g, _table_mod.link.Link)1437and (1438getattr(g._v_attrs, "pandas_type", None)1439or getattr(g, "table", None)1440or (isinstance(g, _table_mod.table.Table) and g._v_name != "table")1441)1442)1443]14441445def walk(self, where="/"):1446"""1447Walk the pytables group hierarchy for pandas objects.14481449This generator will yield the group path, subgroups and pandas object1450names for each group.14511452Any non-pandas PyTables objects that are not a group will be ignored.14531454The `where` group itself is listed first (preorder), then each of its1455child groups (following an alphanumerical order) is also traversed,1456following the same procedure.14571458Parameters1459----------1460where : str, default "/"1461Group where to start walking.14621463Yields1464------1465path : str1466Full path to a group (without trailing '/').1467groups : list1468Names (strings) of the groups contained in `path`.1469leaves : list1470Names (strings) of the pandas objects contained in `path`.1471"""1472_tables()1473self._check_if_open()1474assert self._handle is not None # for mypy1475assert _table_mod is not None # for mypy14761477for g in self._handle.walk_groups(where):1478if getattr(g._v_attrs, "pandas_type", None) is not None:1479continue14801481groups = []1482leaves = []1483for child in g._v_children.values():1484pandas_type = getattr(child._v_attrs, "pandas_type", None)1485if pandas_type is None:1486if isinstance(child, _table_mod.group.Group):1487groups.append(child._v_name)1488else:1489leaves.append(child._v_name)14901491yield (g._v_pathname.rstrip("/"), groups, leaves)14921493def get_node(self, key: str) -> Node | None:1494"""return the node with the key or None if it does not exist"""1495self._check_if_open()1496if not key.startswith("/"):1497key = "/" + key14981499assert self._handle is not None1500assert _table_mod is not None # for mypy1501try:1502node = self._handle.get_node(self.root, key)1503except _table_mod.exceptions.NoSuchNodeError:1504return None15051506assert isinstance(node, _table_mod.Node), type(node)1507return node15081509def get_storer(self, key: str) -> GenericFixed | Table:1510"""return the storer object for a key, raise if not in the file"""1511group = self.get_node(key)1512if group is None:1513raise KeyError(f"No object named {key} in the file")15141515s = self._create_storer(group)1516s.infer_axes()1517return s15181519def copy(1520self,1521file,1522mode="w",1523propindexes: bool = True,1524keys=None,1525complib=None,1526complevel: int | None = None,1527fletcher32: bool = False,1528overwrite=True,1529):1530"""1531Copy the existing store to a new file, updating in place.15321533Parameters1534----------1535propindexes : bool, default True1536Restore indexes in copied file.1537keys : list, optional1538List of keys to include in the copy (defaults to all).1539overwrite : bool, default True1540Whether to overwrite (remove and replace) existing nodes in the new store.1541mode, complib, complevel, fletcher32 same as in HDFStore.__init__15421543Returns1544-------1545open file handle of the new store1546"""1547new_store = HDFStore(1548file, mode=mode, complib=complib, complevel=complevel, fletcher32=fletcher321549)1550if keys is None:1551keys = list(self.keys())1552if not isinstance(keys, (tuple, list)):1553keys = [keys]1554for k in keys:1555s = self.get_storer(k)1556if s is not None:15571558if k in new_store:1559if overwrite:1560new_store.remove(k)15611562data = self.select(k)1563if isinstance(s, Table):15641565index: bool | list[str] = False1566if propindexes:1567index = [a.name for a in s.axes if a.is_indexed]1568new_store.append(1569k,1570data,1571index=index,1572data_columns=getattr(s, "data_columns", None),1573encoding=s.encoding,1574)1575else:1576new_store.put(k, data, encoding=s.encoding)15771578return new_store15791580def info(self) -> str:1581"""1582Print detailed information on the store.15831584Returns1585-------1586str1587"""1588path = pprint_thing(self._path)1589output = f"{type(self)}\nFile path: {path}\n"15901591if self.is_open:1592lkeys = sorted(self.keys())1593if len(lkeys):1594keys = []1595values = []15961597for k in lkeys:1598try:1599s = self.get_storer(k)1600if s is not None:1601keys.append(pprint_thing(s.pathname or k))1602values.append(pprint_thing(s or "invalid_HDFStore node"))1603except AssertionError:1604# surface any assertion errors for e.g. debugging1605raise1606except Exception as detail:1607keys.append(k)1608dstr = pprint_thing(detail)1609values.append(f"[invalid_HDFStore node: {dstr}]")16101611output += adjoin(12, keys, values)1612else:1613output += "Empty"1614else:1615output += "File is CLOSED"16161617return output16181619# ------------------------------------------------------------------------1620# private methods16211622def _check_if_open(self):1623if not self.is_open:1624raise ClosedFileError(f"{self._path} file is not open!")16251626def _validate_format(self, format: str) -> str:1627"""validate / deprecate formats"""1628# validate1629try:1630format = _FORMAT_MAP[format.lower()]1631except KeyError as err:1632raise TypeError(f"invalid HDFStore format specified [{format}]") from err16331634return format16351636def _create_storer(1637self,1638group,1639format=None,1640value: DataFrame | Series | None = None,1641encoding: str = "UTF-8",1642errors: str = "strict",1643) -> GenericFixed | Table:1644"""return a suitable class to operate"""1645cls: type[GenericFixed] | type[Table]16461647if value is not None and not isinstance(value, (Series, DataFrame)):1648raise TypeError("value must be None, Series, or DataFrame")16491650def error(t):1651# return instead of raising so mypy can tell where we are raising1652return TypeError(1653f"cannot properly create the storer for: [{t}] [group->"1654f"{group},value->{type(value)},format->{format}"1655)16561657pt = _ensure_decoded(getattr(group._v_attrs, "pandas_type", None))1658tt = _ensure_decoded(getattr(group._v_attrs, "table_type", None))16591660# infer the pt from the passed value1661if pt is None:1662if value is None:1663_tables()1664assert _table_mod is not None # for mypy1665if getattr(group, "table", None) or isinstance(1666group, _table_mod.table.Table1667):1668pt = "frame_table"1669tt = "generic_table"1670else:1671raise TypeError(1672"cannot create a storer if the object is not existing "1673"nor a value are passed"1674)1675else:1676if isinstance(value, Series):1677pt = "series"1678else:1679pt = "frame"16801681# we are actually a table1682if format == "table":1683pt += "_table"16841685# a storer node1686if "table" not in pt:1687_STORER_MAP = {"series": SeriesFixed, "frame": FrameFixed}1688try:1689cls = _STORER_MAP[pt]1690except KeyError as err:1691raise error("_STORER_MAP") from err1692return cls(self, group, encoding=encoding, errors=errors)16931694# existing node (and must be a table)1695if tt is None:1696# if we are a writer, determine the tt1697if value is not None:1698if pt == "series_table":1699index = getattr(value, "index", None)1700if index is not None:1701if index.nlevels == 1:1702tt = "appendable_series"1703elif index.nlevels > 1:1704tt = "appendable_multiseries"1705elif pt == "frame_table":1706index = getattr(value, "index", None)1707if index is not None:1708if index.nlevels == 1:1709tt = "appendable_frame"1710elif index.nlevels > 1:1711tt = "appendable_multiframe"17121713_TABLE_MAP = {1714"generic_table": GenericTable,1715"appendable_series": AppendableSeriesTable,1716"appendable_multiseries": AppendableMultiSeriesTable,1717"appendable_frame": AppendableFrameTable,1718"appendable_multiframe": AppendableMultiFrameTable,1719"worm": WORMTable,1720}1721try:1722cls = _TABLE_MAP[tt]1723except KeyError as err:1724raise error("_TABLE_MAP") from err17251726return cls(self, group, encoding=encoding, errors=errors)17271728def _write_to_group(1729self,1730key: str,1731value: DataFrame | Series,1732format,1733axes=None,1734index=True,1735append=False,1736complib=None,1737complevel: int | None = None,1738fletcher32=None,1739min_itemsize: int | dict[str, int] | None = None,1740chunksize=None,1741expectedrows=None,1742dropna=False,1743nan_rep=None,1744data_columns=None,1745encoding=None,1746errors: str = "strict",1747track_times: bool = True,1748) -> None:1749# we don't want to store a table node at all if our object is 0-len1750# as there are not dtypes1751if getattr(value, "empty", None) and (format == "table" or append):1752return17531754group = self._identify_group(key, append)17551756s = self._create_storer(group, format, value, encoding=encoding, errors=errors)1757if append:1758# raise if we are trying to append to a Fixed format,1759# or a table that exists (and we are putting)1760if not s.is_table or (s.is_table and format == "fixed" and s.is_exists):1761raise ValueError("Can only append to Tables")1762if not s.is_exists:1763s.set_object_info()1764else:1765s.set_object_info()17661767if not s.is_table and complib:1768raise ValueError("Compression not supported on Fixed format stores")17691770# write the object1771s.write(1772obj=value,1773axes=axes,1774append=append,1775complib=complib,1776complevel=complevel,1777fletcher32=fletcher32,1778min_itemsize=min_itemsize,1779chunksize=chunksize,1780expectedrows=expectedrows,1781dropna=dropna,1782nan_rep=nan_rep,1783data_columns=data_columns,1784track_times=track_times,1785)17861787if isinstance(s, Table) and index:1788s.create_index(columns=index)17891790def _read_group(self, group: Node):1791s = self._create_storer(group)1792s.infer_axes()1793return s.read()17941795def _identify_group(self, key: str, append: bool) -> Node:1796"""Identify HDF5 group based on key, delete/create group if needed."""1797group = self.get_node(key)17981799# we make this assertion for mypy; the get_node call will already1800# have raised if this is incorrect1801assert self._handle is not None18021803# remove the node if we are not appending1804if group is not None and not append:1805self._handle.remove_node(group, recursive=True)1806group = None18071808if group is None:1809group = self._create_nodes_and_group(key)18101811return group18121813def _create_nodes_and_group(self, key: str) -> Node:1814"""Create nodes from key and return group name."""1815# assertion for mypy1816assert self._handle is not None18171818paths = key.split("/")1819# recursively create the groups1820path = "/"1821for p in paths:1822if not len(p):1823continue1824new_path = path1825if not path.endswith("/"):1826new_path += "/"1827new_path += p1828group = self.get_node(new_path)1829if group is None:1830group = self._handle.create_group(path, p)1831path = new_path1832return group183318341835class TableIterator:1836"""1837Define the iteration interface on a table18381839Parameters1840----------1841store : HDFStore1842s : the referred storer1843func : the function to execute the query1844where : the where of the query1845nrows : the rows to iterate on1846start : the passed start value (default is None)1847stop : the passed stop value (default is None)1848iterator : bool, default False1849Whether to use the default iterator.1850chunksize : the passed chunking value (default is 100000)1851auto_close : bool, default False1852Whether to automatically close the store at the end of iteration.1853"""18541855chunksize: int | None1856store: HDFStore1857s: GenericFixed | Table18581859def __init__(1860self,1861store: HDFStore,1862s: GenericFixed | Table,1863func,1864where,1865nrows,1866start=None,1867stop=None,1868iterator: bool = False,1869chunksize: int | None = None,1870auto_close: bool = False,1871):1872self.store = store1873self.s = s1874self.func = func1875self.where = where18761877# set start/stop if they are not set if we are a table1878if self.s.is_table:1879if nrows is None:1880nrows = 01881if start is None:1882start = 01883if stop is None:1884stop = nrows1885stop = min(nrows, stop)18861887self.nrows = nrows1888self.start = start1889self.stop = stop18901891self.coordinates = None1892if iterator or chunksize is not None:1893if chunksize is None:1894chunksize = 1000001895self.chunksize = int(chunksize)1896else:1897self.chunksize = None18981899self.auto_close = auto_close19001901def __iter__(self):1902# iterate1903current = self.start1904if self.coordinates is None:1905raise ValueError("Cannot iterate until get_result is called.")1906while current < self.stop:1907stop = min(current + self.chunksize, self.stop)1908value = self.func(None, None, self.coordinates[current:stop])1909current = stop1910if value is None or not len(value):1911continue19121913yield value19141915self.close()19161917def close(self):1918if self.auto_close:1919self.store.close()19201921def get_result(self, coordinates: bool = False):1922# return the actual iterator1923if self.chunksize is not None:1924if not isinstance(self.s, Table):1925raise TypeError("can only use an iterator or chunksize on a table")19261927self.coordinates = self.s.read_coordinates(where=self.where)19281929return self19301931# if specified read via coordinates (necessary for multiple selections1932if coordinates:1933if not isinstance(self.s, Table):1934raise TypeError("can only read_coordinates on a table")1935where = self.s.read_coordinates(1936where=self.where, start=self.start, stop=self.stop1937)1938else:1939where = self.where19401941# directly return the result1942results = self.func(self.start, self.stop, where)1943self.close()1944return results194519461947class IndexCol:1948"""1949an index column description class19501951Parameters1952----------1953axis : axis which I reference1954values : the ndarray like converted values1955kind : a string description of this type1956typ : the pytables type1957pos : the position in the pytables19581959"""19601961is_an_indexable = True1962is_data_indexable = True1963_info_fields = ["freq", "tz", "index_name"]19641965name: str1966cname: str19671968def __init__(1969self,1970name: str,1971values=None,1972kind=None,1973typ=None,1974cname: str | None = None,1975axis=None,1976pos=None,1977freq=None,1978tz=None,1979index_name=None,1980ordered=None,1981table=None,1982meta=None,1983metadata=None,1984):19851986if not isinstance(name, str):1987raise ValueError("`name` must be a str.")19881989self.values = values1990self.kind = kind1991self.typ = typ1992self.name = name1993self.cname = cname or name1994self.axis = axis1995self.pos = pos1996self.freq = freq1997self.tz = tz1998self.index_name = index_name1999self.ordered = ordered2000self.table = table2001self.meta = meta2002self.metadata = metadata20032004if pos is not None:2005self.set_pos(pos)20062007# These are ensured as long as the passed arguments match the2008# constructor annotations.2009assert isinstance(self.name, str)2010assert isinstance(self.cname, str)20112012@property2013def itemsize(self) -> int:2014# Assumes self.typ has already been initialized2015return self.typ.itemsize20162017@property2018def kind_attr(self) -> str:2019return f"{self.name}_kind"20202021def set_pos(self, pos: int):2022"""set the position of this column in the Table"""2023self.pos = pos2024if pos is not None and self.typ is not None:2025self.typ._v_pos = pos20262027def __repr__(self) -> str:2028temp = tuple(2029map(pprint_thing, (self.name, self.cname, self.axis, self.pos, self.kind))2030)2031return ",".join(2032[2033f"{key}->{value}"2034for key, value in zip(["name", "cname", "axis", "pos", "kind"], temp)2035]2036)20372038def __eq__(self, other: Any) -> bool:2039"""compare 2 col items"""2040return all(2041getattr(self, a, None) == getattr(other, a, None)2042for a in ["name", "cname", "axis", "pos"]2043)20442045def __ne__(self, other) -> bool:2046return not self.__eq__(other)20472048@property2049def is_indexed(self) -> bool:2050"""return whether I am an indexed column"""2051if not hasattr(self.table, "cols"):2052# e.g. if infer hasn't been called yet, self.table will be None.2053return False2054return getattr(self.table.cols, self.cname).is_indexed20552056def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):2057"""2058Convert the data from this selection to the appropriate pandas type.2059"""2060assert isinstance(values, np.ndarray), type(values)20612062# values is a recarray2063if values.dtype.fields is not None:2064values = values[self.cname]20652066val_kind = _ensure_decoded(self.kind)2067values = _maybe_convert(values, val_kind, encoding, errors)20682069kwargs = {}2070kwargs["name"] = _ensure_decoded(self.index_name)20712072if self.freq is not None:2073kwargs["freq"] = _ensure_decoded(self.freq)20742075factory: type[Index] | type[DatetimeIndex] = Index2076if is_datetime64_dtype(values.dtype) or is_datetime64tz_dtype(values.dtype):2077factory = DatetimeIndex2078elif values.dtype == "i8" and "freq" in kwargs:2079# PeriodIndex data is stored as i82080# error: Incompatible types in assignment (expression has type2081# "Callable[[Any, KwArg(Any)], PeriodIndex]", variable has type2082# "Union[Type[Index], Type[DatetimeIndex]]")2083factory = lambda x, **kwds: PeriodIndex( # type: ignore[assignment]2084ordinal=x, **kwds2085)20862087# making an Index instance could throw a number of different errors2088try:2089new_pd_index = factory(values, **kwargs)2090except ValueError:2091# if the output freq is different that what we recorded,2092# it should be None (see also 'doc example part 2')2093if "freq" in kwargs:2094kwargs["freq"] = None2095new_pd_index = factory(values, **kwargs)2096final_pd_index = _set_tz(new_pd_index, self.tz)2097return final_pd_index, final_pd_index20982099def take_data(self):2100"""return the values"""2101return self.values21022103@property2104def attrs(self):2105return self.table._v_attrs21062107@property2108def description(self):2109return self.table.description21102111@property2112def col(self):2113"""return my current col description"""2114return getattr(self.description, self.cname, None)21152116@property2117def cvalues(self):2118"""return my cython values"""2119return self.values21202121def __iter__(self):2122return iter(self.values)21232124def maybe_set_size(self, min_itemsize=None):2125"""2126maybe set a string col itemsize:2127min_itemsize can be an integer or a dict with this columns name2128with an integer size2129"""2130if _ensure_decoded(self.kind) == "string":2131if isinstance(min_itemsize, dict):2132min_itemsize = min_itemsize.get(self.name)21332134if min_itemsize is not None and self.typ.itemsize < min_itemsize:2135self.typ = _tables().StringCol(itemsize=min_itemsize, pos=self.pos)21362137def validate_names(self):2138pass21392140def validate_and_set(self, handler: AppendableTable, append: bool):2141self.table = handler.table2142self.validate_col()2143self.validate_attr(append)2144self.validate_metadata(handler)2145self.write_metadata(handler)2146self.set_attr()21472148def validate_col(self, itemsize=None):2149"""validate this column: return the compared against itemsize"""2150# validate this column for string truncation (or reset to the max size)2151if _ensure_decoded(self.kind) == "string":2152c = self.col2153if c is not None:2154if itemsize is None:2155itemsize = self.itemsize2156if c.itemsize < itemsize:2157raise ValueError(2158f"Trying to store a string with len [{itemsize}] in "2159f"[{self.cname}] column but\nthis column has a limit of "2160f"[{c.itemsize}]!\nConsider using min_itemsize to "2161"preset the sizes on these columns"2162)2163return c.itemsize21642165return None21662167def validate_attr(self, append: bool):2168# check for backwards incompatibility2169if append:2170existing_kind = getattr(self.attrs, self.kind_attr, None)2171if existing_kind is not None and existing_kind != self.kind:2172raise TypeError(2173f"incompatible kind in col [{existing_kind} - {self.kind}]"2174)21752176def update_info(self, info):2177"""2178set/update the info for this indexable with the key/value2179if there is a conflict raise/warn as needed2180"""2181for key in self._info_fields:21822183value = getattr(self, key, None)2184idx = info.setdefault(self.name, {})21852186existing_value = idx.get(key)2187if key in idx and value is not None and existing_value != value:2188# frequency/name just warn2189if key in ["freq", "index_name"]:2190ws = attribute_conflict_doc % (key, existing_value, value)2191warnings.warn(2192ws, AttributeConflictWarning, stacklevel=find_stack_level()2193)21942195# reset2196idx[key] = None2197setattr(self, key, None)21982199else:2200raise ValueError(2201f"invalid info for [{self.name}] for [{key}], "2202f"existing_value [{existing_value}] conflicts with "2203f"new value [{value}]"2204)2205else:2206if value is not None or existing_value is not None:2207idx[key] = value22082209def set_info(self, info):2210"""set my state from the passed info"""2211idx = info.get(self.name)2212if idx is not None:2213self.__dict__.update(idx)22142215def set_attr(self):2216"""set the kind for this column"""2217setattr(self.attrs, self.kind_attr, self.kind)22182219def validate_metadata(self, handler: AppendableTable):2220"""validate that kind=category does not change the categories"""2221if self.meta == "category":2222new_metadata = self.metadata2223cur_metadata = handler.read_metadata(self.cname)2224if (2225new_metadata is not None2226and cur_metadata is not None2227and not array_equivalent(new_metadata, cur_metadata)2228):2229raise ValueError(2230"cannot append a categorical with "2231"different categories to the existing"2232)22332234def write_metadata(self, handler: AppendableTable):2235"""set the meta data"""2236if self.metadata is not None:2237handler.write_metadata(self.cname, self.metadata)223822392240class GenericIndexCol(IndexCol):2241"""an index which is not represented in the data of the table"""22422243@property2244def is_indexed(self) -> bool:2245return False22462247def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):2248"""2249Convert the data from this selection to the appropriate pandas type.22502251Parameters2252----------2253values : np.ndarray2254nan_rep : str2255encoding : str2256errors : str2257"""2258assert isinstance(values, np.ndarray), type(values)22592260# error: Incompatible types in assignment (expression has type2261# "Int64Index", variable has type "ndarray")2262values = Int64Index(np.arange(len(values))) # type: ignore[assignment]2263return values, values22642265def set_attr(self):2266pass226722682269class DataCol(IndexCol):2270"""2271a data holding column, by definition this is not indexable22722273Parameters2274----------2275data : the actual data2276cname : the column name in the table to hold the data (typically2277values)2278meta : a string description of the metadata2279metadata : the actual metadata2280"""22812282is_an_indexable = False2283is_data_indexable = False2284_info_fields = ["tz", "ordered"]22852286def __init__(2287self,2288name: str,2289values=None,2290kind=None,2291typ=None,2292cname=None,2293pos=None,2294tz=None,2295ordered=None,2296table=None,2297meta=None,2298metadata=None,2299dtype: DtypeArg | None = None,2300data=None,2301):2302super().__init__(2303name=name,2304values=values,2305kind=kind,2306typ=typ,2307pos=pos,2308cname=cname,2309tz=tz,2310ordered=ordered,2311table=table,2312meta=meta,2313metadata=metadata,2314)2315self.dtype = dtype2316self.data = data23172318@property2319def dtype_attr(self) -> str:2320return f"{self.name}_dtype"23212322@property2323def meta_attr(self) -> str:2324return f"{self.name}_meta"23252326def __repr__(self) -> str:2327temp = tuple(2328map(2329pprint_thing, (self.name, self.cname, self.dtype, self.kind, self.shape)2330)2331)2332return ",".join(2333[2334f"{key}->{value}"2335for key, value in zip(["name", "cname", "dtype", "kind", "shape"], temp)2336]2337)23382339def __eq__(self, other: Any) -> bool:2340"""compare 2 col items"""2341return all(2342getattr(self, a, None) == getattr(other, a, None)2343for a in ["name", "cname", "dtype", "pos"]2344)23452346def set_data(self, data: ArrayLike):2347assert data is not None2348assert self.dtype is None23492350data, dtype_name = _get_data_and_dtype_name(data)23512352self.data = data2353self.dtype = dtype_name2354self.kind = _dtype_to_kind(dtype_name)23552356def take_data(self):2357"""return the data"""2358return self.data23592360@classmethod2361def _get_atom(cls, values: ArrayLike) -> Col:2362"""2363Get an appropriately typed and shaped pytables.Col object for values.2364"""2365dtype = values.dtype2366# error: Item "ExtensionDtype" of "Union[ExtensionDtype, dtype[Any]]" has no2367# attribute "itemsize"2368itemsize = dtype.itemsize # type: ignore[union-attr]23692370shape = values.shape2371if values.ndim == 1:2372# EA, use block shape pretending it is 2D2373# TODO(EA2D): not necessary with 2D EAs2374shape = (1, values.size)23752376if isinstance(values, Categorical):2377codes = values.codes2378atom = cls.get_atom_data(shape, kind=codes.dtype.name)2379elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):2380atom = cls.get_atom_datetime64(shape)2381elif is_timedelta64_dtype(dtype):2382atom = cls.get_atom_timedelta64(shape)2383elif is_complex_dtype(dtype):2384atom = _tables().ComplexCol(itemsize=itemsize, shape=shape[0])2385elif is_string_dtype(dtype):2386atom = cls.get_atom_string(shape, itemsize)2387else:2388atom = cls.get_atom_data(shape, kind=dtype.name)23892390return atom23912392@classmethod2393def get_atom_string(cls, shape, itemsize):2394return _tables().StringCol(itemsize=itemsize, shape=shape[0])23952396@classmethod2397def get_atom_coltype(cls, kind: str) -> type[Col]:2398"""return the PyTables column class for this column"""2399if kind.startswith("uint"):2400k4 = kind[4:]2401col_name = f"UInt{k4}Col"2402elif kind.startswith("period"):2403# we store as integer2404col_name = "Int64Col"2405else:2406kcap = kind.capitalize()2407col_name = f"{kcap}Col"24082409return getattr(_tables(), col_name)24102411@classmethod2412def get_atom_data(cls, shape, kind: str) -> Col:2413return cls.get_atom_coltype(kind=kind)(shape=shape[0])24142415@classmethod2416def get_atom_datetime64(cls, shape):2417return _tables().Int64Col(shape=shape[0])24182419@classmethod2420def get_atom_timedelta64(cls, shape):2421return _tables().Int64Col(shape=shape[0])24222423@property2424def shape(self):2425return getattr(self.data, "shape", None)24262427@property2428def cvalues(self):2429"""return my cython values"""2430return self.data24312432def validate_attr(self, append):2433"""validate that we have the same order as the existing & same dtype"""2434if append:2435existing_fields = getattr(self.attrs, self.kind_attr, None)2436if existing_fields is not None and existing_fields != list(self.values):2437raise ValueError("appended items do not match existing items in table!")24382439existing_dtype = getattr(self.attrs, self.dtype_attr, None)2440if existing_dtype is not None and existing_dtype != self.dtype:2441raise ValueError(2442"appended items dtype do not match existing items dtype in table!"2443)24442445def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):2446"""2447Convert the data from this selection to the appropriate pandas type.24482449Parameters2450----------2451values : np.ndarray2452nan_rep :2453encoding : str2454errors : str24552456Returns2457-------2458index : listlike to become an Index2459data : ndarraylike to become a column2460"""2461assert isinstance(values, np.ndarray), type(values)24622463# values is a recarray2464if values.dtype.fields is not None:2465values = values[self.cname]24662467assert self.typ is not None2468if self.dtype is None:2469# Note: in tests we never have timedelta64 or datetime64,2470# so the _get_data_and_dtype_name may be unnecessary2471converted, dtype_name = _get_data_and_dtype_name(values)2472kind = _dtype_to_kind(dtype_name)2473else:2474converted = values2475dtype_name = self.dtype2476kind = self.kind24772478assert isinstance(converted, np.ndarray) # for mypy24792480# use the meta if needed2481meta = _ensure_decoded(self.meta)2482metadata = self.metadata2483ordered = self.ordered2484tz = self.tz24852486assert dtype_name is not None2487# convert to the correct dtype2488dtype = _ensure_decoded(dtype_name)24892490# reverse converts2491if dtype == "datetime64":2492# recreate with tz if indicated2493converted = _set_tz(converted, tz, coerce=True)24942495elif dtype == "timedelta64":2496converted = np.asarray(converted, dtype="m8[ns]")2497elif dtype == "date":2498try:2499converted = np.asarray(2500[date.fromordinal(v) for v in converted], dtype=object2501)2502except ValueError:2503converted = np.asarray(2504[date.fromtimestamp(v) for v in converted], dtype=object2505)25062507elif meta == "category":2508# we have a categorical2509categories = metadata2510codes = converted.ravel()25112512# if we have stored a NaN in the categories2513# then strip it; in theory we could have BOTH2514# -1s in the codes and nulls :<2515if categories is None:2516# Handle case of NaN-only categorical columns in which case2517# the categories are an empty array; when this is stored,2518# pytables cannot write a zero-len array, so on readback2519# the categories would be None and `read_hdf()` would fail.2520categories = Index([], dtype=np.float64)2521else:2522mask = isna(categories)2523if mask.any():2524categories = categories[~mask]2525codes[codes != -1] -= mask.astype(int).cumsum()._values25262527converted = Categorical.from_codes(2528codes, categories=categories, ordered=ordered2529)25302531else:25322533try:2534converted = converted.astype(dtype, copy=False)2535except TypeError:2536converted = converted.astype("O", copy=False)25372538# convert nans / decode2539if _ensure_decoded(kind) == "string":2540converted = _unconvert_string_array(2541converted, nan_rep=nan_rep, encoding=encoding, errors=errors2542)25432544return self.values, converted25452546def set_attr(self):2547"""set the data for this column"""2548setattr(self.attrs, self.kind_attr, self.values)2549setattr(self.attrs, self.meta_attr, self.meta)2550assert self.dtype is not None2551setattr(self.attrs, self.dtype_attr, self.dtype)255225532554class DataIndexableCol(DataCol):2555"""represent a data column that can be indexed"""25562557is_data_indexable = True25582559def validate_names(self):2560if not Index(self.values).is_object():2561# TODO: should the message here be more specifically non-str?2562raise ValueError("cannot have non-object label DataIndexableCol")25632564@classmethod2565def get_atom_string(cls, shape, itemsize):2566return _tables().StringCol(itemsize=itemsize)25672568@classmethod2569def get_atom_data(cls, shape, kind: str) -> Col:2570return cls.get_atom_coltype(kind=kind)()25712572@classmethod2573def get_atom_datetime64(cls, shape):2574return _tables().Int64Col()25752576@classmethod2577def get_atom_timedelta64(cls, shape):2578return _tables().Int64Col()257925802581class GenericDataIndexableCol(DataIndexableCol):2582"""represent a generic pytables data column"""25832584pass258525862587class Fixed:2588"""2589represent an object in my store2590facilitate read/write of various types of objects2591this is an abstract base class25922593Parameters2594----------2595parent : HDFStore2596group : Node2597The group node where the table resides.2598"""25992600pandas_kind: str2601format_type: str = "fixed" # GH#30962 needed by dask2602obj_type: type[DataFrame | Series]2603ndim: int2604encoding: str2605parent: HDFStore2606group: Node2607errors: str2608is_table = False26092610def __init__(2611self,2612parent: HDFStore,2613group: Node,2614encoding: str = "UTF-8",2615errors: str = "strict",2616):2617assert isinstance(parent, HDFStore), type(parent)2618assert _table_mod is not None # needed for mypy2619assert isinstance(group, _table_mod.Node), type(group)2620self.parent = parent2621self.group = group2622self.encoding = _ensure_encoding(encoding)2623self.errors = errors26242625@property2626def is_old_version(self) -> bool:2627return self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 126282629@property2630def version(self) -> tuple[int, int, int]:2631"""compute and set our version"""2632version = _ensure_decoded(getattr(self.group._v_attrs, "pandas_version", None))2633try:2634version = tuple(int(x) for x in version.split("."))2635if len(version) == 2:2636version = version + (0,)2637except AttributeError:2638version = (0, 0, 0)2639return version26402641@property2642def pandas_type(self):2643return _ensure_decoded(getattr(self.group._v_attrs, "pandas_type", None))26442645def __repr__(self) -> str:2646"""return a pretty representation of myself"""2647self.infer_axes()2648s = self.shape2649if s is not None:2650if isinstance(s, (list, tuple)):2651jshape = ",".join([pprint_thing(x) for x in s])2652s = f"[{jshape}]"2653return f"{self.pandas_type:12.12} (shape->{s})"2654return self.pandas_type26552656def set_object_info(self):2657"""set my pandas type & version"""2658self.attrs.pandas_type = str(self.pandas_kind)2659self.attrs.pandas_version = str(_version)26602661def copy(self):2662new_self = copy.copy(self)2663return new_self26642665@property2666def shape(self):2667return self.nrows26682669@property2670def pathname(self):2671return self.group._v_pathname26722673@property2674def _handle(self):2675return self.parent._handle26762677@property2678def _filters(self):2679return self.parent._filters26802681@property2682def _complevel(self) -> int:2683return self.parent._complevel26842685@property2686def _fletcher32(self) -> bool:2687return self.parent._fletcher3226882689@property2690def attrs(self):2691return self.group._v_attrs26922693def set_attrs(self):2694"""set our object attributes"""2695pass26962697def get_attrs(self):2698"""get our object attributes"""2699pass27002701@property2702def storable(self):2703"""return my storable"""2704return self.group27052706@property2707def is_exists(self) -> bool:2708return False27092710@property2711def nrows(self):2712return getattr(self.storable, "nrows", None)27132714def validate(self, other):2715"""validate against an existing storable"""2716if other is None:2717return2718return True27192720def validate_version(self, where=None):2721"""are we trying to operate on an old version?"""2722return True27232724def infer_axes(self):2725"""2726infer the axes of my storer2727return a boolean indicating if we have a valid storer or not2728"""2729s = self.storable2730if s is None:2731return False2732self.get_attrs()2733return True27342735def read(2736self,2737where=None,2738columns=None,2739start: int | None = None,2740stop: int | None = None,2741):2742raise NotImplementedError(2743"cannot read on an abstract storer: subclasses should implement"2744)27452746def write(self, **kwargs):2747raise NotImplementedError(2748"cannot write on an abstract storer: subclasses should implement"2749)27502751def delete(self, where=None, start: int | None = None, stop: int | None = None):2752"""2753support fully deleting the node in its entirety (only) - where2754specification must be None2755"""2756if com.all_none(where, start, stop):2757self._handle.remove_node(self.group, recursive=True)2758return None27592760raise TypeError("cannot delete on an abstract storer")276127622763class GenericFixed(Fixed):2764"""a generified fixed version"""27652766_index_type_map = {DatetimeIndex: "datetime", PeriodIndex: "period"}2767_reverse_index_map = {v: k for k, v in _index_type_map.items()}2768attributes: list[str] = []27692770# indexer helpers2771def _class_to_alias(self, cls) -> str:2772return self._index_type_map.get(cls, "")27732774def _alias_to_class(self, alias):2775if isinstance(alias, type): # pragma: no cover2776# compat: for a short period of time master stored types2777return alias2778return self._reverse_index_map.get(alias, Index)27792780def _get_index_factory(self, attrs):2781index_class = self._alias_to_class(2782_ensure_decoded(getattr(attrs, "index_class", ""))2783)27842785factory: Callable27862787if index_class == DatetimeIndex:27882789def f(values, freq=None, tz=None):2790# data are already in UTC, localize and convert if tz present2791dta = DatetimeArray._simple_new(values.values, freq=freq)2792result = DatetimeIndex._simple_new(dta, name=None)2793if tz is not None:2794result = result.tz_localize("UTC").tz_convert(tz)2795return result27962797factory = f2798elif index_class == PeriodIndex:27992800def f(values, freq=None, tz=None):2801parr = PeriodArray._simple_new(values, freq=freq)2802return PeriodIndex._simple_new(parr, name=None)28032804factory = f2805else:2806factory = index_class28072808kwargs = {}2809if "freq" in attrs:2810kwargs["freq"] = attrs["freq"]2811if index_class is Index:2812# DTI/PI would be gotten by _alias_to_class2813factory = TimedeltaIndex28142815if "tz" in attrs:2816if isinstance(attrs["tz"], bytes):2817# created by python22818kwargs["tz"] = attrs["tz"].decode("utf-8")2819else:2820# created by python32821kwargs["tz"] = attrs["tz"]2822assert index_class is DatetimeIndex # just checking28232824return factory, kwargs28252826def validate_read(self, columns, where):2827"""2828raise if any keywords are passed which are not-None2829"""2830if columns is not None:2831raise TypeError(2832"cannot pass a column specification when reading "2833"a Fixed format store. this store must be selected in its entirety"2834)2835if where is not None:2836raise TypeError(2837"cannot pass a where specification when reading "2838"from a Fixed format store. this store must be selected in its entirety"2839)28402841@property2842def is_exists(self) -> bool:2843return True28442845def set_attrs(self):2846"""set our object attributes"""2847self.attrs.encoding = self.encoding2848self.attrs.errors = self.errors28492850def get_attrs(self):2851"""retrieve our attributes"""2852self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None))2853self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict"))2854for n in self.attributes:2855setattr(self, n, _ensure_decoded(getattr(self.attrs, n, None)))28562857def write(self, obj, **kwargs):2858self.set_attrs()28592860def read_array(self, key: str, start: int | None = None, stop: int | None = None):2861"""read an array for the specified node (off of group"""2862import tables28632864node = getattr(self.group, key)2865attrs = node._v_attrs28662867transposed = getattr(attrs, "transposed", False)28682869if isinstance(node, tables.VLArray):2870ret = node[0][start:stop]2871else:2872dtype = _ensure_decoded(getattr(attrs, "value_type", None))2873shape = getattr(attrs, "shape", None)28742875if shape is not None:2876# length 0 axis2877ret = np.empty(shape, dtype=dtype)2878else:2879ret = node[start:stop]28802881if dtype == "datetime64":2882# reconstruct a timezone if indicated2883tz = getattr(attrs, "tz", None)2884ret = _set_tz(ret, tz, coerce=True)28852886elif dtype == "timedelta64":2887ret = np.asarray(ret, dtype="m8[ns]")28882889if transposed:2890return ret.T2891else:2892return ret28932894def read_index(2895self, key: str, start: int | None = None, stop: int | None = None2896) -> Index:2897variety = _ensure_decoded(getattr(self.attrs, f"{key}_variety"))28982899if variety == "multi":2900return self.read_multi_index(key, start=start, stop=stop)2901elif variety == "regular":2902node = getattr(self.group, key)2903index = self.read_index_node(node, start=start, stop=stop)2904return index2905else: # pragma: no cover2906raise TypeError(f"unrecognized index variety: {variety}")29072908def write_index(self, key: str, index: Index):2909if isinstance(index, MultiIndex):2910setattr(self.attrs, f"{key}_variety", "multi")2911self.write_multi_index(key, index)2912else:2913setattr(self.attrs, f"{key}_variety", "regular")2914converted = _convert_index("index", index, self.encoding, self.errors)29152916self.write_array(key, converted.values)29172918node = getattr(self.group, key)2919node._v_attrs.kind = converted.kind2920node._v_attrs.name = index.name29212922if isinstance(index, (DatetimeIndex, PeriodIndex)):2923node._v_attrs.index_class = self._class_to_alias(type(index))29242925if isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):2926node._v_attrs.freq = index.freq29272928if isinstance(index, DatetimeIndex) and index.tz is not None:2929node._v_attrs.tz = _get_tz(index.tz)29302931def write_multi_index(self, key: str, index: MultiIndex):2932setattr(self.attrs, f"{key}_nlevels", index.nlevels)29332934for i, (lev, level_codes, name) in enumerate(2935zip(index.levels, index.codes, index.names)2936):2937# write the level2938if is_extension_array_dtype(lev):2939raise NotImplementedError(2940"Saving a MultiIndex with an extension dtype is not supported."2941)2942level_key = f"{key}_level{i}"2943conv_level = _convert_index(level_key, lev, self.encoding, self.errors)2944self.write_array(level_key, conv_level.values)2945node = getattr(self.group, level_key)2946node._v_attrs.kind = conv_level.kind2947node._v_attrs.name = name29482949# write the name2950setattr(node._v_attrs, f"{key}_name{name}", name)29512952# write the labels2953label_key = f"{key}_label{i}"2954self.write_array(label_key, level_codes)29552956def read_multi_index(2957self, key: str, start: int | None = None, stop: int | None = None2958) -> MultiIndex:2959nlevels = getattr(self.attrs, f"{key}_nlevels")29602961levels = []2962codes = []2963names: list[Hashable] = []2964for i in range(nlevels):2965level_key = f"{key}_level{i}"2966node = getattr(self.group, level_key)2967lev = self.read_index_node(node, start=start, stop=stop)2968levels.append(lev)2969names.append(lev.name)29702971label_key = f"{key}_label{i}"2972level_codes = self.read_array(label_key, start=start, stop=stop)2973codes.append(level_codes)29742975return MultiIndex(2976levels=levels, codes=codes, names=names, verify_integrity=True2977)29782979def read_index_node(2980self, node: Node, start: int | None = None, stop: int | None = None2981) -> Index:2982data = node[start:stop]2983# If the index was an empty array write_array_empty() will2984# have written a sentinel. Here we replace it with the original.2985if "shape" in node._v_attrs and np.prod(node._v_attrs.shape) == 0:2986data = np.empty(node._v_attrs.shape, dtype=node._v_attrs.value_type)2987kind = _ensure_decoded(node._v_attrs.kind)2988name = None29892990if "name" in node._v_attrs:2991name = _ensure_str(node._v_attrs.name)2992name = _ensure_decoded(name)29932994attrs = node._v_attrs2995factory, kwargs = self._get_index_factory(attrs)29962997if kind == "date":2998index = factory(2999_unconvert_index(3000data, kind, encoding=self.encoding, errors=self.errors3001),3002dtype=object,3003**kwargs,3004)3005else:3006index = factory(3007_unconvert_index(3008data, kind, encoding=self.encoding, errors=self.errors3009),3010**kwargs,3011)30123013index.name = name30143015return index30163017def write_array_empty(self, key: str, value: ArrayLike):3018"""write a 0-len array"""3019# ugly hack for length 0 axes3020arr = np.empty((1,) * value.ndim)3021self._handle.create_array(self.group, key, arr)3022node = getattr(self.group, key)3023node._v_attrs.value_type = str(value.dtype)3024node._v_attrs.shape = value.shape30253026def write_array(3027self, key: str, obj: DataFrame | Series, items: Index | None = None3028) -> None:3029# TODO: we only have a few tests that get here, the only EA3030# that gets passed is DatetimeArray, and we never have3031# both self._filters and EA30323033value = extract_array(obj, extract_numpy=True)30343035if key in self.group:3036self._handle.remove_node(self.group, key)30373038# Transform needed to interface with pytables row/col notation3039empty_array = value.size == 03040transposed = False30413042if is_categorical_dtype(value.dtype):3043raise NotImplementedError(3044"Cannot store a category dtype in a HDF5 dataset that uses format="3045'"fixed". Use format="table".'3046)3047if not empty_array:3048if hasattr(value, "T"):3049# ExtensionArrays (1d) may not have transpose.3050value = value.T3051transposed = True30523053atom = None3054if self._filters is not None:3055with suppress(ValueError):3056# get the atom for this datatype3057atom = _tables().Atom.from_dtype(value.dtype)30583059if atom is not None:3060# We only get here if self._filters is non-None and3061# the Atom.from_dtype call succeeded30623063# create an empty chunked array and fill it from value3064if not empty_array:3065ca = self._handle.create_carray(3066self.group, key, atom, value.shape, filters=self._filters3067)3068ca[:] = value30693070else:3071self.write_array_empty(key, value)30723073elif value.dtype.type == np.object_:3074# infer the type, warn if we have a non-string type here (for3075# performance)3076inferred_type = lib.infer_dtype(value, skipna=False)3077if empty_array:3078pass3079elif inferred_type == "string":3080pass3081else:3082ws = performance_doc % (inferred_type, key, items)3083warnings.warn(ws, PerformanceWarning, stacklevel=find_stack_level())30843085vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom())3086vlarr.append(value)30873088elif is_datetime64_dtype(value.dtype):3089self._handle.create_array(self.group, key, value.view("i8"))3090getattr(self.group, key)._v_attrs.value_type = "datetime64"3091elif is_datetime64tz_dtype(value.dtype):3092# store as UTC3093# with a zone30943095# error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no3096# attribute "asi8"3097self._handle.create_array(3098self.group, key, value.asi8 # type: ignore[union-attr]3099)31003101node = getattr(self.group, key)3102# error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no3103# attribute "tz"3104node._v_attrs.tz = _get_tz(value.tz) # type: ignore[union-attr]3105node._v_attrs.value_type = "datetime64"3106elif is_timedelta64_dtype(value.dtype):3107self._handle.create_array(self.group, key, value.view("i8"))3108getattr(self.group, key)._v_attrs.value_type = "timedelta64"3109elif empty_array:3110self.write_array_empty(key, value)3111else:3112self._handle.create_array(self.group, key, value)31133114getattr(self.group, key)._v_attrs.transposed = transposed311531163117class SeriesFixed(GenericFixed):3118pandas_kind = "series"3119attributes = ["name"]31203121name: Hashable31223123@property3124def shape(self):3125try:3126return (len(self.group.values),)3127except (TypeError, AttributeError):3128return None31293130def read(3131self,3132where=None,3133columns=None,3134start: int | None = None,3135stop: int | None = None,3136):3137self.validate_read(columns, where)3138index = self.read_index("index", start=start, stop=stop)3139values = self.read_array("values", start=start, stop=stop)3140return Series(values, index=index, name=self.name)31413142def write(self, obj, **kwargs):3143super().write(obj, **kwargs)3144self.write_index("index", obj.index)3145self.write_array("values", obj)3146self.attrs.name = obj.name314731483149class BlockManagerFixed(GenericFixed):3150attributes = ["ndim", "nblocks"]31513152nblocks: int31533154@property3155def shape(self) -> Shape | None:3156try:3157ndim = self.ndim31583159# items3160items = 03161for i in range(self.nblocks):3162node = getattr(self.group, f"block{i}_items")3163shape = getattr(node, "shape", None)3164if shape is not None:3165items += shape[0]31663167# data shape3168node = self.group.block0_values3169shape = getattr(node, "shape", None)3170if shape is not None:3171shape = list(shape[0 : (ndim - 1)])3172else:3173shape = []31743175shape.append(items)31763177return shape3178except AttributeError:3179return None31803181def read(3182self,3183where=None,3184columns=None,3185start: int | None = None,3186stop: int | None = None,3187):3188# start, stop applied to rows, so 0th axis only3189self.validate_read(columns, where)3190select_axis = self.obj_type()._get_block_manager_axis(0)31913192axes = []3193for i in range(self.ndim):31943195_start, _stop = (start, stop) if i == select_axis else (None, None)3196ax = self.read_index(f"axis{i}", start=_start, stop=_stop)3197axes.append(ax)31983199items = axes[0]3200dfs = []32013202for i in range(self.nblocks):32033204blk_items = self.read_index(f"block{i}_items")3205values = self.read_array(f"block{i}_values", start=_start, stop=_stop)32063207columns = items[items.get_indexer(blk_items)]3208df = DataFrame(values.T, columns=columns, index=axes[1])3209dfs.append(df)32103211if len(dfs) > 0:3212out = concat(dfs, axis=1)3213out = out.reindex(columns=items, copy=False)3214return out32153216return DataFrame(columns=axes[0], index=axes[1])32173218def write(self, obj, **kwargs):3219super().write(obj, **kwargs)32203221# TODO(ArrayManager) HDFStore relies on accessing the blocks3222if isinstance(obj._mgr, ArrayManager):3223obj = obj._as_manager("block")32243225data = obj._mgr3226if not data.is_consolidated():3227data = data.consolidate()32283229self.attrs.ndim = data.ndim3230for i, ax in enumerate(data.axes):3231if i == 0 and (not ax.is_unique):3232raise ValueError("Columns index has to be unique for fixed format")3233self.write_index(f"axis{i}", ax)32343235# Supporting mixed-type DataFrame objects...nontrivial3236self.attrs.nblocks = len(data.blocks)3237for i, blk in enumerate(data.blocks):3238# I have no idea why, but writing values before items fixed #22993239blk_items = data.items.take(blk.mgr_locs)3240self.write_array(f"block{i}_values", blk.values, items=blk_items)3241self.write_index(f"block{i}_items", blk_items)324232433244class FrameFixed(BlockManagerFixed):3245pandas_kind = "frame"3246obj_type = DataFrame324732483249class Table(Fixed):3250"""3251represent a table:3252facilitate read/write of various types of tables32533254Attrs in Table Node3255-------------------3256These are attributes that are store in the main table node, they are3257necessary to recreate these tables when read back in.32583259index_axes : a list of tuples of the (original indexing axis and3260index column)3261non_index_axes: a list of tuples of the (original index axis and3262columns on a non-indexing axis)3263values_axes : a list of the columns which comprise the data of this3264table3265data_columns : a list of the columns that we are allowing indexing3266(these become single columns in values_axes)3267nan_rep : the string to use for nan representations for string3268objects3269levels : the names of levels3270metadata : the names of the metadata columns3271"""32723273pandas_kind = "wide_table"3274format_type: str = "table" # GH#30962 needed by dask3275table_type: str3276levels: int | list[Hashable] = 13277is_table = True32783279index_axes: list[IndexCol]3280non_index_axes: list[tuple[int, Any]]3281values_axes: list[DataCol]3282data_columns: list3283metadata: list3284info: dict32853286def __init__(3287self,3288parent: HDFStore,3289group: Node,3290encoding=None,3291errors: str = "strict",3292index_axes=None,3293non_index_axes=None,3294values_axes=None,3295data_columns=None,3296info=None,3297nan_rep=None,3298):3299super().__init__(parent, group, encoding=encoding, errors=errors)3300self.index_axes = index_axes or []3301self.non_index_axes = non_index_axes or []3302self.values_axes = values_axes or []3303self.data_columns = data_columns or []3304self.info = info or {}3305self.nan_rep = nan_rep33063307@property3308def table_type_short(self) -> str:3309return self.table_type.split("_")[0]33103311def __repr__(self) -> str:3312"""return a pretty representation of myself"""3313self.infer_axes()3314jdc = ",".join(self.data_columns) if len(self.data_columns) else ""3315dc = f",dc->[{jdc}]"33163317ver = ""3318if self.is_old_version:3319jver = ".".join([str(x) for x in self.version])3320ver = f"[{jver}]"33213322jindex_axes = ",".join([a.name for a in self.index_axes])3323return (3324f"{self.pandas_type:12.12}{ver} "3325f"(typ->{self.table_type_short},nrows->{self.nrows},"3326f"ncols->{self.ncols},indexers->[{jindex_axes}]{dc})"3327)33283329def __getitem__(self, c: str):3330"""return the axis for c"""3331for a in self.axes:3332if c == a.name:3333return a3334return None33353336def validate(self, other):3337"""validate against an existing table"""3338if other is None:3339return33403341if other.table_type != self.table_type:3342raise TypeError(3343"incompatible table_type with existing "3344f"[{other.table_type} - {self.table_type}]"3345)33463347for c in ["index_axes", "non_index_axes", "values_axes"]:3348sv = getattr(self, c, None)3349ov = getattr(other, c, None)3350if sv != ov:33513352# show the error for the specific axes3353# Argument 1 to "enumerate" has incompatible type3354# "Optional[Any]"; expected "Iterable[Any]" [arg-type]3355for i, sax in enumerate(sv): # type: ignore[arg-type]3356# Value of type "Optional[Any]" is not indexable [index]3357oax = ov[i] # type: ignore[index]3358if sax != oax:3359raise ValueError(3360f"invalid combination of [{c}] on appending data "3361f"[{sax}] vs current table [{oax}]"3362)33633364# should never get here3365raise Exception(3366f"invalid combination of [{c}] on appending data [{sv}] vs "3367f"current table [{ov}]"3368)33693370@property3371def is_multi_index(self) -> bool:3372"""the levels attribute is 1 or a list in the case of a multi-index"""3373return isinstance(self.levels, list)33743375def validate_multiindex(3376self, obj: DataFrame | Series3377) -> tuple[DataFrame, list[Hashable]]:3378"""3379validate that we can store the multi-index; reset and return the3380new object3381"""3382levels = com.fill_missing_names(obj.index.names)3383try:3384reset_obj = obj.reset_index()3385except ValueError as err:3386raise ValueError(3387"duplicate names/columns in the multi-index when storing as a table"3388) from err3389assert isinstance(reset_obj, DataFrame) # for mypy3390return reset_obj, levels33913392@property3393def nrows_expected(self) -> int:3394"""based on our axes, compute the expected nrows"""3395return np.prod([i.cvalues.shape[0] for i in self.index_axes])33963397@property3398def is_exists(self) -> bool:3399"""has this table been created"""3400return "table" in self.group34013402@property3403def storable(self):3404return getattr(self.group, "table", None)34053406@property3407def table(self):3408"""return the table group (this is my storable)"""3409return self.storable34103411@property3412def dtype(self):3413return self.table.dtype34143415@property3416def description(self):3417return self.table.description34183419@property3420def axes(self):3421return itertools.chain(self.index_axes, self.values_axes)34223423@property3424def ncols(self) -> int:3425"""the number of total columns in the values axes"""3426return sum(len(a.values) for a in self.values_axes)34273428@property3429def is_transposed(self) -> bool:3430return False34313432@property3433def data_orientation(self):3434"""return a tuple of my permutated axes, non_indexable at the front"""3435return tuple(3436itertools.chain(3437[int(a[0]) for a in self.non_index_axes],3438[int(a.axis) for a in self.index_axes],3439)3440)34413442def queryables(self) -> dict[str, Any]:3443"""return a dict of the kinds allowable columns for this object"""3444# mypy doesn't recognize DataFrame._AXIS_NAMES, so we re-write it here3445axis_names = {0: "index", 1: "columns"}34463447# compute the values_axes queryables3448d1 = [(a.cname, a) for a in self.index_axes]3449d2 = [(axis_names[axis], None) for axis, values in self.non_index_axes]3450d3 = [3451(v.cname, v) for v in self.values_axes if v.name in set(self.data_columns)3452]34533454# error: Unsupported operand types for + ("List[Tuple[str, IndexCol]]" and3455# "List[Tuple[str, None]]")3456return dict(d1 + d2 + d3) # type: ignore[operator]34573458def index_cols(self):3459"""return a list of my index cols"""3460# Note: each `i.cname` below is assured to be a str.3461return [(i.axis, i.cname) for i in self.index_axes]34623463def values_cols(self) -> list[str]:3464"""return a list of my values cols"""3465return [i.cname for i in self.values_axes]34663467def _get_metadata_path(self, key: str) -> str:3468"""return the metadata pathname for this key"""3469group = self.group._v_pathname3470return f"{group}/meta/{key}/meta"34713472def write_metadata(self, key: str, values: np.ndarray):3473"""3474Write out a metadata array to the key as a fixed-format Series.34753476Parameters3477----------3478key : str3479values : ndarray3480"""3481self.parent.put(3482self._get_metadata_path(key),3483Series(values),3484format="table",3485encoding=self.encoding,3486errors=self.errors,3487nan_rep=self.nan_rep,3488)34893490def read_metadata(self, key: str):3491"""return the meta data array for this key"""3492if getattr(getattr(self.group, "meta", None), key, None) is not None:3493return self.parent.select(self._get_metadata_path(key))3494return None34953496def set_attrs(self):3497"""set our table type & indexables"""3498self.attrs.table_type = str(self.table_type)3499self.attrs.index_cols = self.index_cols()3500self.attrs.values_cols = self.values_cols()3501self.attrs.non_index_axes = self.non_index_axes3502self.attrs.data_columns = self.data_columns3503self.attrs.nan_rep = self.nan_rep3504self.attrs.encoding = self.encoding3505self.attrs.errors = self.errors3506self.attrs.levels = self.levels3507self.attrs.info = self.info35083509def get_attrs(self):3510"""retrieve our attributes"""3511self.non_index_axes = getattr(self.attrs, "non_index_axes", None) or []3512self.data_columns = getattr(self.attrs, "data_columns", None) or []3513self.info = getattr(self.attrs, "info", None) or {}3514self.nan_rep = getattr(self.attrs, "nan_rep", None)3515self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None))3516self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict"))3517self.levels: list[Hashable] = getattr(self.attrs, "levels", None) or []3518self.index_axes = [a for a in self.indexables if a.is_an_indexable]3519self.values_axes = [a for a in self.indexables if not a.is_an_indexable]35203521def validate_version(self, where=None):3522"""are we trying to operate on an old version?"""3523if where is not None:3524if self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1:3525ws = incompatibility_doc % ".".join([str(x) for x in self.version])3526warnings.warn(ws, IncompatibilityWarning)35273528def validate_min_itemsize(self, min_itemsize):3529"""3530validate the min_itemsize doesn't contain items that are not in the3531axes this needs data_columns to be defined3532"""3533if min_itemsize is None:3534return3535if not isinstance(min_itemsize, dict):3536return35373538q = self.queryables()3539for k in min_itemsize:35403541# ok, apply generally3542if k == "values":3543continue3544if k not in q:3545raise ValueError(3546f"min_itemsize has the key [{k}] which is not an axis or "3547"data_column"3548)35493550@cache_readonly3551def indexables(self):3552"""create/cache the indexables if they don't exist"""3553_indexables = []35543555desc = self.description3556table_attrs = self.table.attrs35573558# Note: each of the `name` kwargs below are str, ensured3559# by the definition in index_cols.3560# index columns3561for i, (axis, name) in enumerate(self.attrs.index_cols):3562atom = getattr(desc, name)3563md = self.read_metadata(name)3564meta = "category" if md is not None else None35653566kind_attr = f"{name}_kind"3567kind = getattr(table_attrs, kind_attr, None)35683569index_col = IndexCol(3570name=name,3571axis=axis,3572pos=i,3573kind=kind,3574typ=atom,3575table=self.table,3576meta=meta,3577metadata=md,3578)3579_indexables.append(index_col)35803581# values columns3582dc = set(self.data_columns)3583base_pos = len(_indexables)35843585def f(i, c):3586assert isinstance(c, str)3587klass = DataCol3588if c in dc:3589klass = DataIndexableCol35903591atom = getattr(desc, c)3592adj_name = _maybe_adjust_name(c, self.version)35933594# TODO: why kind_attr here?3595values = getattr(table_attrs, f"{adj_name}_kind", None)3596dtype = getattr(table_attrs, f"{adj_name}_dtype", None)3597# Argument 1 to "_dtype_to_kind" has incompatible type3598# "Optional[Any]"; expected "str" [arg-type]3599kind = _dtype_to_kind(dtype) # type: ignore[arg-type]36003601md = self.read_metadata(c)3602# TODO: figure out why these two versions of `meta` dont always match.3603# meta = "category" if md is not None else None3604meta = getattr(table_attrs, f"{adj_name}_meta", None)36053606obj = klass(3607name=adj_name,3608cname=c,3609values=values,3610kind=kind,3611pos=base_pos + i,3612typ=atom,3613table=self.table,3614meta=meta,3615metadata=md,3616dtype=dtype,3617)3618return obj36193620# Note: the definition of `values_cols` ensures that each3621# `c` below is a str.3622_indexables.extend([f(i, c) for i, c in enumerate(self.attrs.values_cols)])36233624return _indexables36253626def create_index(self, columns=None, optlevel=None, kind: str | None = None):3627"""3628Create a pytables index on the specified columns.36293630Parameters3631----------3632columns : None, bool, or listlike[str]3633Indicate which columns to create an index on.36343635* False : Do not create any indexes.3636* True : Create indexes on all columns.3637* None : Create indexes on all columns.3638* listlike : Create indexes on the given columns.36393640optlevel : int or None, default None3641Optimization level, if None, pytables defaults to 6.3642kind : str or None, default None3643Kind of index, if None, pytables defaults to "medium".36443645Raises3646------3647TypeError if trying to create an index on a complex-type column.36483649Notes3650-----3651Cannot index Time64Col or ComplexCol.3652Pytables must be >= 3.0.3653"""3654if not self.infer_axes():3655return3656if columns is False:3657return36583659# index all indexables and data_columns3660if columns is None or columns is True:3661columns = [a.cname for a in self.axes if a.is_data_indexable]3662if not isinstance(columns, (tuple, list)):3663columns = [columns]36643665kw = {}3666if optlevel is not None:3667kw["optlevel"] = optlevel3668if kind is not None:3669kw["kind"] = kind36703671table = self.table3672for c in columns:3673v = getattr(table.cols, c, None)3674if v is not None:3675# remove the index if the kind/optlevel have changed3676if v.is_indexed:3677index = v.index3678cur_optlevel = index.optlevel3679cur_kind = index.kind36803681if kind is not None and cur_kind != kind:3682v.remove_index()3683else:3684kw["kind"] = cur_kind36853686if optlevel is not None and cur_optlevel != optlevel:3687v.remove_index()3688else:3689kw["optlevel"] = cur_optlevel36903691# create the index3692if not v.is_indexed:3693if v.type.startswith("complex"):3694raise TypeError(3695"Columns containing complex values can be stored but "3696"cannot be indexed when using table format. Either use "3697"fixed format, set index=False, or do not include "3698"the columns containing complex values to "3699"data_columns when initializing the table."3700)3701v.create_index(**kw)3702elif c in self.non_index_axes[0][1]:3703# GH 281563704raise AttributeError(3705f"column {c} is not a data_column.\n"3706f"In order to read column {c} you must reload the dataframe \n"3707f"into HDFStore and include {c} with the data_columns argument."3708)37093710def _read_axes(3711self, where, start: int | None = None, stop: int | None = None3712) -> list[tuple[ArrayLike, ArrayLike]]:3713"""3714Create the axes sniffed from the table.37153716Parameters3717----------3718where : ???3719start : int or None, default None3720stop : int or None, default None37213722Returns3723-------3724List[Tuple[index_values, column_values]]3725"""3726# create the selection3727selection = Selection(self, where=where, start=start, stop=stop)3728values = selection.select()37293730results = []3731# convert the data3732for a in self.axes:3733a.set_info(self.info)3734res = a.convert(3735values,3736nan_rep=self.nan_rep,3737encoding=self.encoding,3738errors=self.errors,3739)3740results.append(res)37413742return results37433744@classmethod3745def get_object(cls, obj, transposed: bool):3746"""return the data for this obj"""3747return obj37483749def validate_data_columns(self, data_columns, min_itemsize, non_index_axes):3750"""3751take the input data_columns and min_itemize and create a data3752columns spec3753"""3754if not len(non_index_axes):3755return []37563757axis, axis_labels = non_index_axes[0]3758info = self.info.get(axis, {})3759if info.get("type") == "MultiIndex" and data_columns:3760raise ValueError(3761f"cannot use a multi-index on axis [{axis}] with "3762f"data_columns {data_columns}"3763)37643765# evaluate the passed data_columns, True == use all columns3766# take only valid axis labels3767if data_columns is True:3768data_columns = list(axis_labels)3769elif data_columns is None:3770data_columns = []37713772# if min_itemsize is a dict, add the keys (exclude 'values')3773if isinstance(min_itemsize, dict):3774existing_data_columns = set(data_columns)3775data_columns = list(data_columns) # ensure we do not modify3776data_columns.extend(3777[3778k3779for k in min_itemsize.keys()3780if k != "values" and k not in existing_data_columns3781]3782)37833784# return valid columns in the order of our axis3785return [c for c in data_columns if c in axis_labels]37863787def _create_axes(3788self,3789axes,3790obj: DataFrame,3791validate: bool = True,3792nan_rep=None,3793data_columns=None,3794min_itemsize=None,3795):3796"""3797Create and return the axes.37983799Parameters3800----------3801axes: list or None3802The names or numbers of the axes to create.3803obj : DataFrame3804The object to create axes on.3805validate: bool, default True3806Whether to validate the obj against an existing object already written.3807nan_rep :3808A value to use for string column nan_rep.3809data_columns : List[str], True, or None, default None3810Specify the columns that we want to create to allow indexing on.38113812* True : Use all available columns.3813* None : Use no columns.3814* List[str] : Use the specified columns.38153816min_itemsize: Dict[str, int] or None, default None3817The min itemsize for a column in bytes.3818"""3819if not isinstance(obj, DataFrame):3820group = self.group._v_name3821raise TypeError(3822f"cannot properly create the storer for: [group->{group},"3823f"value->{type(obj)}]"3824)38253826# set the default axes if needed3827if axes is None:3828axes = [0]38293830# map axes to numbers3831axes = [obj._get_axis_number(a) for a in axes]38323833# do we have an existing table (if so, use its axes & data_columns)3834if self.infer_axes():3835table_exists = True3836axes = [a.axis for a in self.index_axes]3837data_columns = list(self.data_columns)3838nan_rep = self.nan_rep3839# TODO: do we always have validate=True here?3840else:3841table_exists = False38423843new_info = self.info38443845assert self.ndim == 2 # with next check, we must have len(axes) == 13846# currently support on ndim-1 axes3847if len(axes) != self.ndim - 1:3848raise ValueError(3849"currently only support ndim-1 indexers in an AppendableTable"3850)38513852# create according to the new data3853new_non_index_axes: list = []38543855# nan_representation3856if nan_rep is None:3857nan_rep = "nan"38583859# We construct the non-index-axis first, since that alters new_info3860idx = [x for x in [0, 1] if x not in axes][0]38613862a = obj.axes[idx]3863# we might be able to change the axes on the appending data if necessary3864append_axis = list(a)3865if table_exists:3866indexer = len(new_non_index_axes) # i.e. 03867exist_axis = self.non_index_axes[indexer][1]3868if not array_equivalent(np.array(append_axis), np.array(exist_axis)):38693870# ahah! -> reindex3871if array_equivalent(3872np.array(sorted(append_axis)), np.array(sorted(exist_axis))3873):3874append_axis = exist_axis38753876# the non_index_axes info3877info = new_info.setdefault(idx, {})3878info["names"] = list(a.names)3879info["type"] = type(a).__name__38803881new_non_index_axes.append((idx, append_axis))38823883# Now we can construct our new index axis3884idx = axes[0]3885a = obj.axes[idx]3886axis_name = obj._get_axis_name(idx)3887new_index = _convert_index(axis_name, a, self.encoding, self.errors)3888new_index.axis = idx38893890# Because we are always 2D, there is only one new_index, so3891# we know it will have pos=03892new_index.set_pos(0)3893new_index.update_info(new_info)3894new_index.maybe_set_size(min_itemsize) # check for column conflicts38953896new_index_axes = [new_index]3897j = len(new_index_axes) # i.e. 13898assert j == 138993900# reindex by our non_index_axes & compute data_columns3901assert len(new_non_index_axes) == 13902for a in new_non_index_axes:3903obj = _reindex_axis(obj, a[0], a[1])39043905transposed = new_index.axis == 139063907# figure out data_columns and get out blocks3908data_columns = self.validate_data_columns(3909data_columns, min_itemsize, new_non_index_axes3910)39113912frame = self.get_object(obj, transposed)._consolidate()39133914blocks, blk_items = self._get_blocks_and_items(3915frame, table_exists, new_non_index_axes, self.values_axes, data_columns3916)39173918# add my values3919vaxes = []3920for i, (blk, b_items) in enumerate(zip(blocks, blk_items)):39213922# shape of the data column are the indexable axes3923klass = DataCol3924name = None39253926# we have a data_column3927if data_columns and len(b_items) == 1 and b_items[0] in data_columns:3928klass = DataIndexableCol3929name = b_items[0]3930if not (name is None or isinstance(name, str)):3931# TODO: should the message here be more specifically non-str?3932raise ValueError("cannot have non-object label DataIndexableCol")39333934# make sure that we match up the existing columns3935# if we have an existing table3936existing_col: DataCol | None39373938if table_exists and validate:3939try:3940existing_col = self.values_axes[i]3941except (IndexError, KeyError) as err:3942raise ValueError(3943f"Incompatible appended table [{blocks}]"3944f"with existing table [{self.values_axes}]"3945) from err3946else:3947existing_col = None39483949new_name = name or f"values_block_{i}"3950data_converted = _maybe_convert_for_string_atom(3951new_name,3952blk.values,3953existing_col=existing_col,3954min_itemsize=min_itemsize,3955nan_rep=nan_rep,3956encoding=self.encoding,3957errors=self.errors,3958columns=b_items,3959)3960adj_name = _maybe_adjust_name(new_name, self.version)39613962typ = klass._get_atom(data_converted)3963kind = _dtype_to_kind(data_converted.dtype.name)3964tz = None3965if getattr(data_converted, "tz", None) is not None:3966tz = _get_tz(data_converted.tz)39673968meta = metadata = ordered = None3969if is_categorical_dtype(data_converted.dtype):3970ordered = data_converted.ordered3971meta = "category"3972metadata = np.array(data_converted.categories, copy=False).ravel()39733974data, dtype_name = _get_data_and_dtype_name(data_converted)39753976col = klass(3977name=adj_name,3978cname=new_name,3979values=list(b_items),3980typ=typ,3981pos=j,3982kind=kind,3983tz=tz,3984ordered=ordered,3985meta=meta,3986metadata=metadata,3987dtype=dtype_name,3988data=data,3989)3990col.update_info(new_info)39913992vaxes.append(col)39933994j += 139953996dcs = [col.name for col in vaxes if col.is_data_indexable]39973998new_table = type(self)(3999parent=self.parent,4000group=self.group,4001encoding=self.encoding,4002errors=self.errors,4003index_axes=new_index_axes,4004non_index_axes=new_non_index_axes,4005values_axes=vaxes,4006data_columns=dcs,4007info=new_info,4008nan_rep=nan_rep,4009)4010if hasattr(self, "levels"):4011# TODO: get this into constructor, only for appropriate subclass4012new_table.levels = self.levels40134014new_table.validate_min_itemsize(min_itemsize)40154016if validate and table_exists:4017new_table.validate(self)40184019return new_table40204021@staticmethod4022def _get_blocks_and_items(4023frame: DataFrame,4024table_exists: bool,4025new_non_index_axes,4026values_axes,4027data_columns,4028):4029# Helper to clarify non-state-altering parts of _create_axes40304031# TODO(ArrayManager) HDFStore relies on accessing the blocks4032if isinstance(frame._mgr, ArrayManager):4033frame = frame._as_manager("block")40344035def get_blk_items(mgr):4036return [mgr.items.take(blk.mgr_locs) for blk in mgr.blocks]40374038mgr = frame._mgr4039mgr = cast(BlockManager, mgr)4040blocks: list[Block] = list(mgr.blocks)4041blk_items: list[Index] = get_blk_items(mgr)40424043if len(data_columns):4044axis, axis_labels = new_non_index_axes[0]4045new_labels = Index(axis_labels).difference(Index(data_columns))4046mgr = frame.reindex(new_labels, axis=axis)._mgr40474048# error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" has no4049# attribute "blocks"4050blocks = list(mgr.blocks) # type: ignore[union-attr]4051blk_items = get_blk_items(mgr)4052for c in data_columns:4053mgr = frame.reindex([c], axis=axis)._mgr4054# error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" has4055# no attribute "blocks"4056blocks.extend(mgr.blocks) # type: ignore[union-attr]4057blk_items.extend(get_blk_items(mgr))40584059# reorder the blocks in the same order as the existing table if we can4060if table_exists:4061by_items = {4062tuple(b_items.tolist()): (b, b_items)4063for b, b_items in zip(blocks, blk_items)4064}4065new_blocks: list[Block] = []4066new_blk_items = []4067for ea in values_axes:4068items = tuple(ea.values)4069try:4070b, b_items = by_items.pop(items)4071new_blocks.append(b)4072new_blk_items.append(b_items)4073except (IndexError, KeyError) as err:4074jitems = ",".join([pprint_thing(item) for item in items])4075raise ValueError(4076f"cannot match existing table structure for [{jitems}] "4077"on appending data"4078) from err4079blocks = new_blocks4080blk_items = new_blk_items40814082return blocks, blk_items40834084def process_axes(self, obj, selection: Selection, columns=None):4085"""process axes filters"""4086# make a copy to avoid side effects4087if columns is not None:4088columns = list(columns)40894090# make sure to include levels if we have them4091if columns is not None and self.is_multi_index:4092assert isinstance(self.levels, list) # assured by is_multi_index4093for n in self.levels:4094if n not in columns:4095columns.insert(0, n)40964097# reorder by any non_index_axes & limit to the select columns4098for axis, labels in self.non_index_axes:4099obj = _reindex_axis(obj, axis, labels, columns)41004101# apply the selection filters (but keep in the same order)4102if selection.filter is not None:4103for field, op, filt in selection.filter.format():41044105def process_filter(field, filt):41064107for axis_name in obj._AXIS_ORDERS:4108axis_number = obj._get_axis_number(axis_name)4109axis_values = obj._get_axis(axis_name)4110assert axis_number is not None41114112# see if the field is the name of an axis4113if field == axis_name:41144115# if we have a multi-index, then need to include4116# the levels4117if self.is_multi_index:4118filt = filt.union(Index(self.levels))41194120takers = op(axis_values, filt)4121return obj.loc(axis=axis_number)[takers]41224123# this might be the name of a file IN an axis4124elif field in axis_values:41254126# we need to filter on this dimension4127values = ensure_index(getattr(obj, field).values)4128filt = ensure_index(filt)41294130# hack until we support reversed dim flags4131if isinstance(obj, DataFrame):4132axis_number = 1 - axis_number4133takers = op(values, filt)4134return obj.loc(axis=axis_number)[takers]41354136raise ValueError(f"cannot find the field [{field}] for filtering!")41374138obj = process_filter(field, filt)41394140return obj41414142def create_description(4143self,4144complib,4145complevel: int | None,4146fletcher32: bool,4147expectedrows: int | None,4148) -> dict[str, Any]:4149"""create the description of the table from the axes & values"""4150# provided expected rows if its passed4151if expectedrows is None:4152expectedrows = max(self.nrows_expected, 10000)41534154d = {"name": "table", "expectedrows": expectedrows}41554156# description from the axes & values4157d["description"] = {a.cname: a.typ for a in self.axes}41584159if complib:4160if complevel is None:4161complevel = self._complevel or 94162filters = _tables().Filters(4163complevel=complevel,4164complib=complib,4165fletcher32=fletcher32 or self._fletcher32,4166)4167d["filters"] = filters4168elif self._filters is not None:4169d["filters"] = self._filters41704171return d41724173def read_coordinates(4174self, where=None, start: int | None = None, stop: int | None = None4175):4176"""4177select coordinates (row numbers) from a table; return the4178coordinates object4179"""4180# validate the version4181self.validate_version(where)41824183# infer the data kind4184if not self.infer_axes():4185return False41864187# create the selection4188selection = Selection(self, where=where, start=start, stop=stop)4189coords = selection.select_coords()4190if selection.filter is not None:4191for field, op, filt in selection.filter.format():4192data = self.read_column(4193field, start=coords.min(), stop=coords.max() + 14194)4195coords = coords[op(data.iloc[coords - coords.min()], filt).values]41964197return Index(coords)41984199def read_column(4200self,4201column: str,4202where=None,4203start: int | None = None,4204stop: int | None = None,4205):4206"""4207return a single column from the table, generally only indexables4208are interesting4209"""4210# validate the version4211self.validate_version()42124213# infer the data kind4214if not self.infer_axes():4215return False42164217if where is not None:4218raise TypeError("read_column does not currently accept a where clause")42194220# find the axes4221for a in self.axes:4222if column == a.name:4223if not a.is_data_indexable:4224raise ValueError(4225f"column [{column}] can not be extracted individually; "4226"it is not data indexable"4227)42284229# column must be an indexable or a data column4230c = getattr(self.table.cols, column)4231a.set_info(self.info)4232col_values = a.convert(4233c[start:stop],4234nan_rep=self.nan_rep,4235encoding=self.encoding,4236errors=self.errors,4237)4238return Series(_set_tz(col_values[1], a.tz), name=column)42394240raise KeyError(f"column [{column}] not found in the table")424142424243class WORMTable(Table):4244"""4245a write-once read-many table: this format DOES NOT ALLOW appending to a4246table. writing is a one-time operation the data are stored in a format4247that allows for searching the data on disk4248"""42494250table_type = "worm"42514252def read(4253self,4254where=None,4255columns=None,4256start: int | None = None,4257stop: int | None = None,4258):4259"""4260read the indices and the indexing array, calculate offset rows and return4261"""4262raise NotImplementedError("WORMTable needs to implement read")42634264def write(self, **kwargs):4265"""4266write in a format that we can search later on (but cannot append4267to): write out the indices and the values using _write_array4268(e.g. a CArray) create an indexing table so that we can search4269"""4270raise NotImplementedError("WORMTable needs to implement write")427142724273class AppendableTable(Table):4274"""support the new appendable table formats"""42754276table_type = "appendable"42774278def write(4279self,4280obj,4281axes=None,4282append=False,4283complib=None,4284complevel=None,4285fletcher32=None,4286min_itemsize=None,4287chunksize=None,4288expectedrows=None,4289dropna=False,4290nan_rep=None,4291data_columns=None,4292track_times=True,4293):4294if not append and self.is_exists:4295self._handle.remove_node(self.group, "table")42964297# create the axes4298table = self._create_axes(4299axes=axes,4300obj=obj,4301validate=append,4302min_itemsize=min_itemsize,4303nan_rep=nan_rep,4304data_columns=data_columns,4305)43064307for a in table.axes:4308a.validate_names()43094310if not table.is_exists:43114312# create the table4313options = table.create_description(4314complib=complib,4315complevel=complevel,4316fletcher32=fletcher32,4317expectedrows=expectedrows,4318)43194320# set the table attributes4321table.set_attrs()43224323options["track_times"] = track_times43244325# create the table4326table._handle.create_table(table.group, **options)43274328# update my info4329table.attrs.info = table.info43304331# validate the axes and set the kinds4332for a in table.axes:4333a.validate_and_set(table, append)43344335# add the rows4336table.write_data(chunksize, dropna=dropna)43374338def write_data(self, chunksize: int | None, dropna: bool = False):4339"""4340we form the data into a 2-d including indexes,values,mask write chunk-by-chunk4341"""4342names = self.dtype.names4343nrows = self.nrows_expected43444345# if dropna==True, then drop ALL nan rows4346masks = []4347if dropna:4348for a in self.values_axes:4349# figure the mask: only do if we can successfully process this4350# column, otherwise ignore the mask4351mask = isna(a.data).all(axis=0)4352if isinstance(mask, np.ndarray):4353masks.append(mask.astype("u1", copy=False))43544355# consolidate masks4356if len(masks):4357mask = masks[0]4358for m in masks[1:]:4359mask = mask & m4360mask = mask.ravel()4361else:4362mask = None43634364# broadcast the indexes if needed4365indexes = [a.cvalues for a in self.index_axes]4366nindexes = len(indexes)4367assert nindexes == 1, nindexes # ensures we dont need to broadcast43684369# transpose the values so first dimension is last4370# reshape the values if needed4371values = [a.take_data() for a in self.values_axes]4372values = [v.transpose(np.roll(np.arange(v.ndim), v.ndim - 1)) for v in values]4373bvalues = []4374for i, v in enumerate(values):4375new_shape = (nrows,) + self.dtype[names[nindexes + i]].shape4376bvalues.append(values[i].reshape(new_shape))43774378# write the chunks4379if chunksize is None:4380chunksize = 10000043814382rows = np.empty(min(chunksize, nrows), dtype=self.dtype)4383chunks = nrows // chunksize + 14384for i in range(chunks):4385start_i = i * chunksize4386end_i = min((i + 1) * chunksize, nrows)4387if start_i >= end_i:4388break43894390self.write_data_chunk(4391rows,4392indexes=[a[start_i:end_i] for a in indexes],4393mask=mask[start_i:end_i] if mask is not None else None,4394values=[v[start_i:end_i] for v in bvalues],4395)43964397def write_data_chunk(4398self,4399rows: np.ndarray,4400indexes: list[np.ndarray],4401mask: np.ndarray | None,4402values: list[np.ndarray],4403):4404"""4405Parameters4406----------4407rows : an empty memory space where we are putting the chunk4408indexes : an array of the indexes4409mask : an array of the masks4410values : an array of the values4411"""4412# 0 len4413for v in values:4414if not np.prod(v.shape):4415return44164417nrows = indexes[0].shape[0]4418if nrows != len(rows):4419rows = np.empty(nrows, dtype=self.dtype)4420names = self.dtype.names4421nindexes = len(indexes)44224423# indexes4424for i, idx in enumerate(indexes):4425rows[names[i]] = idx44264427# values4428for i, v in enumerate(values):4429rows[names[i + nindexes]] = v44304431# mask4432if mask is not None:4433m = ~mask.ravel().astype(bool, copy=False)4434if not m.all():4435rows = rows[m]44364437if len(rows):4438self.table.append(rows)4439self.table.flush()44404441def delete(self, where=None, start: int | None = None, stop: int | None = None):44424443# delete all rows (and return the nrows)4444if where is None or not len(where):4445if start is None and stop is None:4446nrows = self.nrows4447self._handle.remove_node(self.group, recursive=True)4448else:4449# pytables<3.0 would remove a single row with stop=None4450if stop is None:4451stop = self.nrows4452nrows = self.table.remove_rows(start=start, stop=stop)4453self.table.flush()4454return nrows44554456# infer the data kind4457if not self.infer_axes():4458return None44594460# create the selection4461table = self.table4462selection = Selection(self, where, start=start, stop=stop)4463values = selection.select_coords()44644465# delete the rows in reverse order4466sorted_series = Series(values).sort_values()4467ln = len(sorted_series)44684469if ln:44704471# construct groups of consecutive rows4472diff = sorted_series.diff()4473groups = list(diff[diff > 1].index)44744475# 1 group4476if not len(groups):4477groups = [0]44784479# final element4480if groups[-1] != ln:4481groups.append(ln)44824483# initial element4484if groups[0] != 0:4485groups.insert(0, 0)44864487# we must remove in reverse order!4488pg = groups.pop()4489for g in reversed(groups):4490rows = sorted_series.take(range(g, pg))4491table.remove_rows(4492start=rows[rows.index[0]], stop=rows[rows.index[-1]] + 14493)4494pg = g44954496self.table.flush()44974498# return the number of rows removed4499return ln450045014502class AppendableFrameTable(AppendableTable):4503"""support the new appendable table formats"""45044505pandas_kind = "frame_table"4506table_type = "appendable_frame"4507ndim = 24508obj_type: type[DataFrame | Series] = DataFrame45094510@property4511def is_transposed(self) -> bool:4512return self.index_axes[0].axis == 145134514@classmethod4515def get_object(cls, obj, transposed: bool):4516"""these are written transposed"""4517if transposed:4518obj = obj.T4519return obj45204521def read(4522self,4523where=None,4524columns=None,4525start: int | None = None,4526stop: int | None = None,4527):45284529# validate the version4530self.validate_version(where)45314532# infer the data kind4533if not self.infer_axes():4534return None45354536result = self._read_axes(where=where, start=start, stop=stop)45374538info = (4539self.info.get(self.non_index_axes[0][0], {})4540if len(self.non_index_axes)4541else {}4542)45434544inds = [i for i, ax in enumerate(self.axes) if ax is self.index_axes[0]]4545assert len(inds) == 14546ind = inds[0]45474548index = result[ind][0]45494550frames = []4551for i, a in enumerate(self.axes):4552if a not in self.values_axes:4553continue4554index_vals, cvalues = result[i]45554556# we could have a multi-index constructor here4557# ensure_index doesn't recognized our list-of-tuples here4558if info.get("type") != "MultiIndex":4559cols = Index(index_vals)4560else:4561cols = MultiIndex.from_tuples(index_vals)45624563names = info.get("names")4564if names is not None:4565cols.set_names(names, inplace=True)45664567if self.is_transposed:4568values = cvalues4569index_ = cols4570cols_ = Index(index, name=getattr(index, "name", None))4571else:4572values = cvalues.T4573index_ = Index(index, name=getattr(index, "name", None))4574cols_ = cols45754576# if we have a DataIndexableCol, its shape will only be 1 dim4577if values.ndim == 1 and isinstance(values, np.ndarray):4578values = values.reshape((1, values.shape[0]))45794580if isinstance(values, np.ndarray):4581df = DataFrame(values.T, columns=cols_, index=index_)4582elif isinstance(values, Index):4583df = DataFrame(values, columns=cols_, index=index_)4584else:4585# Categorical4586df = DataFrame._from_arrays([values], columns=cols_, index=index_)4587assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)4588frames.append(df)45894590if len(frames) == 1:4591df = frames[0]4592else:4593df = concat(frames, axis=1)45944595selection = Selection(self, where=where, start=start, stop=stop)4596# apply the selection filters & axis orderings4597df = self.process_axes(df, selection=selection, columns=columns)45984599return df460046014602class AppendableSeriesTable(AppendableFrameTable):4603"""support the new appendable table formats"""46044605pandas_kind = "series_table"4606table_type = "appendable_series"4607ndim = 24608obj_type = Series46094610@property4611def is_transposed(self) -> bool:4612return False46134614@classmethod4615def get_object(cls, obj, transposed: bool):4616return obj46174618def write(self, obj, data_columns=None, **kwargs):4619"""we are going to write this as a frame table"""4620if not isinstance(obj, DataFrame):4621name = obj.name or "values"4622obj = obj.to_frame(name)4623return super().write(obj=obj, data_columns=obj.columns.tolist(), **kwargs)46244625def read(4626self,4627where=None,4628columns=None,4629start: int | None = None,4630stop: int | None = None,4631) -> Series:46324633is_multi_index = self.is_multi_index4634if columns is not None and is_multi_index:4635assert isinstance(self.levels, list) # needed for mypy4636for n in self.levels:4637if n not in columns:4638columns.insert(0, n)4639s = super().read(where=where, columns=columns, start=start, stop=stop)4640if is_multi_index:4641s.set_index(self.levels, inplace=True)46424643s = s.iloc[:, 0]46444645# remove the default name4646if s.name == "values":4647s.name = None4648return s464946504651class AppendableMultiSeriesTable(AppendableSeriesTable):4652"""support the new appendable table formats"""46534654pandas_kind = "series_table"4655table_type = "appendable_multiseries"46564657def write(self, obj, **kwargs):4658"""we are going to write this as a frame table"""4659name = obj.name or "values"4660newobj, self.levels = self.validate_multiindex(obj)4661assert isinstance(self.levels, list) # for mypy4662cols = list(self.levels)4663cols.append(name)4664newobj.columns = Index(cols)4665return super().write(obj=newobj, **kwargs)466646674668class GenericTable(AppendableFrameTable):4669"""a table that read/writes the generic pytables table format"""46704671pandas_kind = "frame_table"4672table_type = "generic_table"4673ndim = 24674obj_type = DataFrame4675levels: list[Hashable]46764677@property4678def pandas_type(self) -> str:4679return self.pandas_kind46804681@property4682def storable(self):4683return getattr(self.group, "table", None) or self.group46844685def get_attrs(self):4686"""retrieve our attributes"""4687self.non_index_axes = []4688self.nan_rep = None4689self.levels = []46904691self.index_axes = [a for a in self.indexables if a.is_an_indexable]4692self.values_axes = [a for a in self.indexables if not a.is_an_indexable]4693self.data_columns = [a.name for a in self.values_axes]46944695@cache_readonly4696def indexables(self):4697"""create the indexables from the table description"""4698d = self.description46994700# TODO: can we get a typ for this? AFAICT it is the only place4701# where we aren't passing one4702# the index columns is just a simple index4703md = self.read_metadata("index")4704meta = "category" if md is not None else None4705index_col = GenericIndexCol(4706name="index", axis=0, table=self.table, meta=meta, metadata=md4707)47084709_indexables: list[GenericIndexCol | GenericDataIndexableCol] = [index_col]47104711for i, n in enumerate(d._v_names):4712assert isinstance(n, str)47134714atom = getattr(d, n)4715md = self.read_metadata(n)4716meta = "category" if md is not None else None4717dc = GenericDataIndexableCol(4718name=n,4719pos=i,4720values=[n],4721typ=atom,4722table=self.table,4723meta=meta,4724metadata=md,4725)4726_indexables.append(dc)47274728return _indexables47294730def write(self, **kwargs):4731raise NotImplementedError("cannot write on an generic table")473247334734class AppendableMultiFrameTable(AppendableFrameTable):4735"""a frame with a multi-index"""47364737table_type = "appendable_multiframe"4738obj_type = DataFrame4739ndim = 24740_re_levels = re.compile(r"^level_\d+$")47414742@property4743def table_type_short(self) -> str:4744return "appendable_multi"47454746def write(self, obj, data_columns=None, **kwargs):4747if data_columns is None:4748data_columns = []4749elif data_columns is True:4750data_columns = obj.columns.tolist()4751obj, self.levels = self.validate_multiindex(obj)4752assert isinstance(self.levels, list) # for mypy4753for n in self.levels:4754if n not in data_columns:4755data_columns.insert(0, n)4756return super().write(obj=obj, data_columns=data_columns, **kwargs)47574758def read(4759self,4760where=None,4761columns=None,4762start: int | None = None,4763stop: int | None = None,4764):47654766df = super().read(where=where, columns=columns, start=start, stop=stop)4767df = df.set_index(self.levels)47684769# remove names for 'level_%d'4770df.index = df.index.set_names(4771[None if self._re_levels.search(name) else name for name in df.index.names]4772)47734774return df477547764777def _reindex_axis(obj: DataFrame, axis: int, labels: Index, other=None) -> DataFrame:4778ax = obj._get_axis(axis)4779labels = ensure_index(labels)47804781# try not to reindex even if other is provided4782# if it equals our current index4783if other is not None:4784other = ensure_index(other)4785if (other is None or labels.equals(other)) and labels.equals(ax):4786return obj47874788labels = ensure_index(labels.unique())4789if other is not None:4790labels = ensure_index(other.unique()).intersection(labels, sort=False)4791if not labels.equals(ax):4792slicer: list[slice | Index] = [slice(None, None)] * obj.ndim4793slicer[axis] = labels4794obj = obj.loc[tuple(slicer)]4795return obj479647974798# tz to/from coercion479948004801def _get_tz(tz: tzinfo) -> str | tzinfo:4802"""for a tz-aware type, return an encoded zone"""4803zone = timezones.get_timezone(tz)4804return zone480548064807def _set_tz(4808values: np.ndarray | Index,4809tz: str | tzinfo | None,4810coerce: bool = False,4811) -> np.ndarray | DatetimeIndex:4812"""4813coerce the values to a DatetimeIndex if tz is set4814preserve the input shape if possible48154816Parameters4817----------4818values : ndarray or Index4819tz : str or tzinfo4820coerce : if we do not have a passed timezone, coerce to M8[ns] ndarray4821"""4822if isinstance(values, DatetimeIndex):4823# If values is tzaware, the tz gets dropped in the values.ravel()4824# call below (which returns an ndarray). So we are only non-lossy4825# if `tz` matches `values.tz`.4826assert values.tz is None or values.tz == tz48274828if tz is not None:4829if isinstance(values, DatetimeIndex):4830name = values.name4831values = values.asi84832else:4833name = None4834values = values.ravel()48354836tz = _ensure_decoded(tz)4837values = DatetimeIndex(values, name=name)4838values = values.tz_localize("UTC").tz_convert(tz)4839elif coerce:4840values = np.asarray(values, dtype="M8[ns]")48414842# error: Incompatible return value type (got "Union[ndarray, Index]",4843# expected "Union[ndarray, DatetimeIndex]")4844return values # type: ignore[return-value]484548464847def _convert_index(name: str, index: Index, encoding: str, errors: str) -> IndexCol:4848assert isinstance(name, str)48494850index_name = index.name4851# error: Argument 1 to "_get_data_and_dtype_name" has incompatible type "Index";4852# expected "Union[ExtensionArray, ndarray]"4853converted, dtype_name = _get_data_and_dtype_name(index) # type: ignore[arg-type]4854kind = _dtype_to_kind(dtype_name)4855atom = DataIndexableCol._get_atom(converted)48564857if isinstance(index, Int64Index) or needs_i8_conversion(index.dtype):4858# Includes Int64Index, RangeIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex,4859# in which case "kind" is "integer", "integer", "datetime64",4860# "timedelta64", and "integer", respectively.4861return IndexCol(4862name,4863values=converted,4864kind=kind,4865typ=atom,4866freq=getattr(index, "freq", None),4867tz=getattr(index, "tz", None),4868index_name=index_name,4869)48704871if isinstance(index, MultiIndex):4872raise TypeError("MultiIndex not supported here!")48734874inferred_type = lib.infer_dtype(index, skipna=False)4875# we won't get inferred_type of "datetime64" or "timedelta64" as these4876# would go through the DatetimeIndex/TimedeltaIndex paths above48774878values = np.asarray(index)48794880if inferred_type == "date":4881converted = np.asarray([v.toordinal() for v in values], dtype=np.int32)4882return IndexCol(4883name, converted, "date", _tables().Time32Col(), index_name=index_name4884)4885elif inferred_type == "string":48864887converted = _convert_string_array(values, encoding, errors)4888itemsize = converted.dtype.itemsize4889return IndexCol(4890name,4891converted,4892"string",4893_tables().StringCol(itemsize),4894index_name=index_name,4895)48964897elif inferred_type in ["integer", "floating"]:4898return IndexCol(4899name, values=converted, kind=kind, typ=atom, index_name=index_name4900)4901else:4902assert isinstance(converted, np.ndarray) and converted.dtype == object4903assert kind == "object", kind4904atom = _tables().ObjectAtom()4905return IndexCol(name, converted, kind, atom, index_name=index_name)490649074908def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray | Index:4909index: Index | np.ndarray49104911if kind == "datetime64":4912index = DatetimeIndex(data)4913elif kind == "timedelta64":4914index = TimedeltaIndex(data)4915elif kind == "date":4916try:4917index = np.asarray([date.fromordinal(v) for v in data], dtype=object)4918except (ValueError):4919index = np.asarray([date.fromtimestamp(v) for v in data], dtype=object)4920elif kind in ("integer", "float"):4921index = np.asarray(data)4922elif kind in ("string"):4923index = _unconvert_string_array(4924data, nan_rep=None, encoding=encoding, errors=errors4925)4926elif kind == "object":4927index = np.asarray(data[0])4928else: # pragma: no cover4929raise ValueError(f"unrecognized index type {kind}")4930return index493149324933def _maybe_convert_for_string_atom(4934name: str,4935bvalues: ArrayLike,4936existing_col,4937min_itemsize,4938nan_rep,4939encoding,4940errors,4941columns: list[str],4942):49434944if bvalues.dtype != object:4945return bvalues49464947bvalues = cast(np.ndarray, bvalues)49484949dtype_name = bvalues.dtype.name4950inferred_type = lib.infer_dtype(bvalues, skipna=False)49514952if inferred_type == "date":4953raise TypeError("[date] is not implemented as a table column")4954elif inferred_type == "datetime":4955# after GH#82604956# this only would be hit for a multi-timezone dtype which is an error4957raise TypeError(4958"too many timezones in this block, create separate data columns"4959)49604961elif not (inferred_type == "string" or dtype_name == "object"):4962return bvalues49634964mask = isna(bvalues)4965data = bvalues.copy()4966data[mask] = nan_rep49674968# see if we have a valid string type4969inferred_type = lib.infer_dtype(data, skipna=False)4970if inferred_type != "string":49714972# we cannot serialize this data, so report an exception on a column4973# by column basis49744975# expected behaviour:4976# search block for a non-string object column by column4977for i in range(data.shape[0]):4978col = data[i]4979inferred_type = lib.infer_dtype(col, skipna=False)4980if inferred_type != "string":4981error_column_label = columns[i] if len(columns) > i else f"No.{i}"4982raise TypeError(4983f"Cannot serialize the column [{error_column_label}]\n"4984f"because its data contents are not [string] but "4985f"[{inferred_type}] object dtype"4986)49874988# itemsize is the maximum length of a string (along any dimension)49894990data_converted = _convert_string_array(data, encoding, errors).reshape(data.shape)4991itemsize = data_converted.itemsize49924993# specified min_itemsize?4994if isinstance(min_itemsize, dict):4995min_itemsize = int(min_itemsize.get(name) or min_itemsize.get("values") or 0)4996itemsize = max(min_itemsize or 0, itemsize)49974998# check for column in the values conflicts4999if existing_col is not None:5000eci = existing_col.validate_col(itemsize)5001if eci is not None and eci > itemsize:5002itemsize = eci50035004data_converted = data_converted.astype(f"|S{itemsize}", copy=False)5005return data_converted500650075008def _convert_string_array(data: np.ndarray, encoding: str, errors: str) -> np.ndarray:5009"""5010Take a string-like that is object dtype and coerce to a fixed size string type.50115012Parameters5013----------5014data : np.ndarray[object]5015encoding : str5016errors : str5017Handler for encoding errors.50185019Returns5020-------5021np.ndarray[fixed-length-string]5022"""5023# encode if needed5024if len(data):5025data = (5026Series(data.ravel())5027.str.encode(encoding, errors)5028._values.reshape(data.shape)5029)50305031# create the sized dtype5032ensured = ensure_object(data.ravel())5033itemsize = max(1, libwriters.max_len_string_array(ensured))50345035data = np.asarray(data, dtype=f"S{itemsize}")5036return data503750385039def _unconvert_string_array(5040data: np.ndarray, nan_rep, encoding: str, errors: str5041) -> np.ndarray:5042"""5043Inverse of _convert_string_array.50445045Parameters5046----------5047data : np.ndarray[fixed-length-string]5048nan_rep : the storage repr of NaN5049encoding : str5050errors : str5051Handler for encoding errors.50525053Returns5054-------5055np.ndarray[object]5056Decoded data.5057"""5058shape = data.shape5059data = np.asarray(data.ravel(), dtype=object)50605061if len(data):50625063itemsize = libwriters.max_len_string_array(ensure_object(data))5064dtype = f"U{itemsize}"50655066if isinstance(data[0], bytes):5067data = Series(data).str.decode(encoding, errors=errors)._values5068else:5069data = data.astype(dtype, copy=False).astype(object, copy=False)50705071if nan_rep is None:5072nan_rep = "nan"50735074libwriters.string_array_replace_from_nan_rep(data, nan_rep)5075return data.reshape(shape)507650775078def _maybe_convert(values: np.ndarray, val_kind: str, encoding: str, errors: str):5079assert isinstance(val_kind, str), type(val_kind)5080if _need_convert(val_kind):5081conv = _get_converter(val_kind, encoding, errors)5082values = conv(values)5083return values508450855086def _get_converter(kind: str, encoding: str, errors: str):5087if kind == "datetime64":5088return lambda x: np.asarray(x, dtype="M8[ns]")5089elif kind == "string":5090return lambda x: _unconvert_string_array(5091x, nan_rep=None, encoding=encoding, errors=errors5092)5093else: # pragma: no cover5094raise ValueError(f"invalid kind {kind}")509550965097def _need_convert(kind: str) -> bool:5098if kind in ("datetime64", "string"):5099return True5100return False510151025103def _maybe_adjust_name(name: str, version: Sequence[int]) -> str:5104"""5105Prior to 0.10.1, we named values blocks like: values_block_0 an the5106name values_0, adjust the given name if necessary.51075108Parameters5109----------5110name : str5111version : Tuple[int, int, int]51125113Returns5114-------5115str5116"""5117if isinstance(version, str) or len(version) < 3:5118raise ValueError("Version is incorrect, expected sequence of 3 integers.")51195120if version[0] == 0 and version[1] <= 10 and version[2] == 0:5121m = re.search(r"values_block_(\d+)", name)5122if m:5123grp = m.groups()[0]5124name = f"values_{grp}"5125return name512651275128def _dtype_to_kind(dtype_str: str) -> str:5129"""5130Find the "kind" string describing the given dtype name.5131"""5132dtype_str = _ensure_decoded(dtype_str)51335134if dtype_str.startswith("string") or dtype_str.startswith("bytes"):5135kind = "string"5136elif dtype_str.startswith("float"):5137kind = "float"5138elif dtype_str.startswith("complex"):5139kind = "complex"5140elif dtype_str.startswith("int") or dtype_str.startswith("uint"):5141kind = "integer"5142elif dtype_str.startswith("datetime64"):5143kind = "datetime64"5144elif dtype_str.startswith("timedelta"):5145kind = "timedelta64"5146elif dtype_str.startswith("bool"):5147kind = "bool"5148elif dtype_str.startswith("category"):5149kind = "category"5150elif dtype_str.startswith("period"):5151# We store the `freq` attr so we can restore from integers5152kind = "integer"5153elif dtype_str == "object":5154kind = "object"5155else:5156raise ValueError(f"cannot interpret dtype of [{dtype_str}]")51575158return kind515951605161def _get_data_and_dtype_name(data: ArrayLike):5162"""5163Convert the passed data into a storable form and a dtype string.5164"""5165if isinstance(data, Categorical):5166data = data.codes51675168# For datetime64tz we need to drop the TZ in tests TODO: why?5169dtype_name = data.dtype.name.split("[")[0]51705171if data.dtype.kind in ["m", "M"]:5172data = np.asarray(data.view("i8"))5173# TODO: we used to reshape for the dt64tz case, but no longer5174# doing that doesn't seem to break anything. why?51755176elif isinstance(data, PeriodIndex):5177data = data.asi851785179data = np.asarray(data)5180return data, dtype_name518151825183class Selection:5184"""5185Carries out a selection operation on a tables.Table object.51865187Parameters5188----------5189table : a Table object5190where : list of Terms (or convertible to)5191start, stop: indices to start and/or stop selection51925193"""51945195def __init__(5196self,5197table: Table,5198where=None,5199start: int | None = None,5200stop: int | None = None,5201):5202self.table = table5203self.where = where5204self.start = start5205self.stop = stop5206self.condition = None5207self.filter = None5208self.terms = None5209self.coordinates = None52105211if is_list_like(where):52125213# see if we have a passed coordinate like5214with suppress(ValueError):5215inferred = lib.infer_dtype(where, skipna=False)5216if inferred == "integer" or inferred == "boolean":5217where = np.asarray(where)5218if where.dtype == np.bool_:5219start, stop = self.start, self.stop5220if start is None:5221start = 05222if stop is None:5223stop = self.table.nrows5224self.coordinates = np.arange(start, stop)[where]5225elif issubclass(where.dtype.type, np.integer):5226if (self.start is not None and (where < self.start).any()) or (5227self.stop is not None and (where >= self.stop).any()5228):5229raise ValueError(5230"where must have index locations >= start and < stop"5231)5232self.coordinates = where52335234if self.coordinates is None:52355236self.terms = self.generate(where)52375238# create the numexpr & the filter5239if self.terms is not None:5240self.condition, self.filter = self.terms.evaluate()52415242def generate(self, where):5243"""where can be a : dict,list,tuple,string"""5244if where is None:5245return None52465247q = self.table.queryables()5248try:5249return PyTablesExpr(where, queryables=q, encoding=self.table.encoding)5250except NameError as err:5251# raise a nice message, suggesting that the user should use5252# data_columns5253qkeys = ",".join(q.keys())5254msg = dedent(5255f"""\5256The passed where expression: {where}5257contains an invalid variable reference5258all of the variable references must be a reference to5259an axis (e.g. 'index' or 'columns'), or a data_column5260The currently defined references are: {qkeys}5261"""5262)5263raise ValueError(msg) from err52645265def select(self):5266"""5267generate the selection5268"""5269if self.condition is not None:5270return self.table.table.read_where(5271self.condition.format(), start=self.start, stop=self.stop5272)5273elif self.coordinates is not None:5274return self.table.table.read_coordinates(self.coordinates)5275return self.table.table.read(start=self.start, stop=self.stop)52765277def select_coords(self):5278"""5279generate the selection5280"""5281start, stop = self.start, self.stop5282nrows = self.table.nrows5283if start is None:5284start = 05285elif start < 0:5286start += nrows5287if stop is None:5288stop = nrows5289elif stop < 0:5290stop += nrows52915292if self.condition is not None:5293return self.table.table.get_where_list(5294self.condition.format(), start=start, stop=stop, sort=True5295)5296elif self.coordinates is not None:5297return self.coordinates52985299return np.arange(start, stop)530053015302