CoCalc -- pytables.py

GitHub Repository: wiseplat/python-code
Path: blob/master/ invest-robot-contest_TinkoffBotTwitch-main/venv/lib/python3.8/site-packages/pandas/io/pytables.py
⁷⁸²⁶ views
1
"""
2
High level interface to PyTables for reading and writing pandas data structures
3
to disk
4
"""
5
from __future__ import annotations
6

7
from contextlib import suppress
8
import copy
9
from datetime import (
10
    date,
11
    tzinfo,
12
)
13
import itertools
14
import os
15
import re
16
from textwrap import dedent
17
from typing import (
18
    TYPE_CHECKING,
19
    Any,
20
    Callable,
21
    Hashable,
22
    Literal,
23
    Sequence,
24
    cast,
25
)
26
import warnings
27

28
import numpy as np
29

30
from pandas._config import (
31
    config,
32
    get_option,
33
)
34

35
from pandas._libs import (
36
    lib,
37
    writers as libwriters,
38
)
39
from pandas._libs.tslibs import timezones
40
from pandas._typing import (
41
    ArrayLike,
42
    DtypeArg,
43
    Shape,
44
)
45
from pandas.compat._optional import import_optional_dependency
46
from pandas.compat.pickle_compat import patch_pickle
47
from pandas.errors import PerformanceWarning
48
from pandas.util._decorators import cache_readonly
49
from pandas.util._exceptions import find_stack_level
50

51
from pandas.core.dtypes.common import (
52
    ensure_object,
53
    is_categorical_dtype,
54
    is_complex_dtype,
55
    is_datetime64_dtype,
56
    is_datetime64tz_dtype,
57
    is_extension_array_dtype,
58
    is_list_like,
59
    is_string_dtype,
60
    is_timedelta64_dtype,
61
    needs_i8_conversion,
62
)
63
from pandas.core.dtypes.missing import array_equivalent
64

65
from pandas import (
66
    DataFrame,
67
    DatetimeIndex,
68
    Index,
69
    MultiIndex,
70
    PeriodIndex,
71
    Series,
72
    TimedeltaIndex,
73
    concat,
74
    isna,
75
)
76
from pandas.core.api import Int64Index
77
from pandas.core.arrays import (
78
    Categorical,
79
    DatetimeArray,
80
    PeriodArray,
81
)
82
import pandas.core.common as com
83
from pandas.core.computation.pytables import (
84
    PyTablesExpr,
85
    maybe_expression,
86
)
87
from pandas.core.construction import extract_array
88
from pandas.core.indexes.api import ensure_index
89
from pandas.core.internals import (
90
    ArrayManager,
91
    BlockManager,
92
)
93

94
from pandas.io.common import stringify_path
95
from pandas.io.formats.printing import (
96
    adjoin,
97
    pprint_thing,
98
)
99

100
if TYPE_CHECKING:
101
    from tables import (
102
        Col,
103
        File,
104
        Node,
105
    )
106

107
    from pandas.core.internals import Block
108

109

110
# versioning attribute
111
_version = "0.15.2"
112

113
# encoding
114
_default_encoding = "UTF-8"
115

116

117
def _ensure_decoded(s):
118
    """if we have bytes, decode them to unicode"""
119
    if isinstance(s, np.bytes_):
120
        s = s.decode("UTF-8")
121
    return s
122

123

124
def _ensure_encoding(encoding):
125
    # set the encoding if we need
126
    if encoding is None:
127
        encoding = _default_encoding
128

129
    return encoding
130

131

132
def _ensure_str(name):
133
    """
134
    Ensure that an index / column name is a str (python 3); otherwise they
135
    may be np.string dtype. Non-string dtypes are passed through unchanged.
136

137
    https://github.com/pandas-dev/pandas/issues/13492
138
    """
139
    if isinstance(name, str):
140
        name = str(name)
141
    return name
142

143

144
Term = PyTablesExpr
145

146

147
def _ensure_term(where, scope_level: int):
148
    """
149
    Ensure that the where is a Term or a list of Term.
150

151
    This makes sure that we are capturing the scope of variables that are
152
    passed create the terms here with a frame_level=2 (we are 2 levels down)
153
    """
154
    # only consider list/tuple here as an ndarray is automatically a coordinate
155
    # list
156
    level = scope_level + 1
157
    if isinstance(where, (list, tuple)):
158
        where = [
159
            Term(term, scope_level=level + 1) if maybe_expression(term) else term
160
            for term in where
161
            if term is not None
162
        ]
163
    elif maybe_expression(where):
164
        where = Term(where, scope_level=level)
165
    return where if where is None or len(where) else None
166

167

168
class PossibleDataLossError(Exception):
169
    pass
170

171

172
class ClosedFileError(Exception):
173
    pass
174

175

176
class IncompatibilityWarning(Warning):
177
    pass
178

179

180
incompatibility_doc = """
181
where criteria is being ignored as this version [%s] is too old (or
182
not-defined), read the file in and write it out to a new file to upgrade (with
183
the copy_to method)
184
"""
185

186

187
class AttributeConflictWarning(Warning):
188
    pass
189

190

191
attribute_conflict_doc = """
192
the [%s] attribute of the existing index is [%s] which conflicts with the new
193
[%s], resetting the attribute to None
194
"""
195

196

197
class DuplicateWarning(Warning):
198
    pass
199

200

201
duplicate_doc = """
202
duplicate entries in table, taking most recently appended
203
"""
204

205
performance_doc = """
206
your performance may suffer as PyTables will pickle object types that it cannot
207
map directly to c-types [inferred_type->%s,key->%s] [items->%s]
208
"""
209

210
# formats
211
_FORMAT_MAP = {"f": "fixed", "fixed": "fixed", "t": "table", "table": "table"}
212

213
# axes map
214
_AXES_MAP = {DataFrame: [0]}
215

216
# register our configuration options
217
dropna_doc = """
218
: boolean
219
    drop ALL nan rows when appending to a table
220
"""
221
format_doc = """
222
: format
223
    default format writing format, if None, then
224
    put will default to 'fixed' and append will default to 'table'
225
"""
226

227
with config.config_prefix("io.hdf"):
228
    config.register_option("dropna_table", False, dropna_doc, validator=config.is_bool)
229
    config.register_option(
230
        "default_format",
231
        None,
232
        format_doc,
233
        validator=config.is_one_of_factory(["fixed", "table", None]),
234
    )
235

236
# oh the troubles to reduce import time
237
_table_mod = None
238
_table_file_open_policy_is_strict = False
239

240

241
def _tables():
242
    global _table_mod
243
    global _table_file_open_policy_is_strict
244
    if _table_mod is None:
245
        import tables
246

247
        _table_mod = tables
248

249
        # set the file open policy
250
        # return the file open policy; this changes as of pytables 3.1
251
        # depending on the HDF5 version
252
        with suppress(AttributeError):
253
            _table_file_open_policy_is_strict = (
254
                tables.file._FILE_OPEN_POLICY == "strict"
255
            )
256

257
    return _table_mod
258

259

260
# interface to/from ###
261

262

263
def to_hdf(
264
    path_or_buf,
265
    key: str,
266
    value: DataFrame | Series,
267
    mode: str = "a",
268
    complevel: int | None = None,
269
    complib: str | None = None,
270
    append: bool = False,
271
    format: str | None = None,
272
    index: bool = True,
273
    min_itemsize: int | dict[str, int] | None = None,
274
    nan_rep=None,
275
    dropna: bool | None = None,
276
    data_columns: Literal[True] | list[str] | None = None,
277
    errors: str = "strict",
278
    encoding: str = "UTF-8",
279
) -> None:
280
    """store this object, close it if we opened it"""
281
    if append:
282
        f = lambda store: store.append(
283
            key,
284
            value,
285
            format=format,
286
            index=index,
287
            min_itemsize=min_itemsize,
288
            nan_rep=nan_rep,
289
            dropna=dropna,
290
            data_columns=data_columns,
291
            errors=errors,
292
            encoding=encoding,
293
        )
294
    else:
295
        # NB: dropna is not passed to `put`
296
        f = lambda store: store.put(
297
            key,
298
            value,
299
            format=format,
300
            index=index,
301
            min_itemsize=min_itemsize,
302
            nan_rep=nan_rep,
303
            data_columns=data_columns,
304
            errors=errors,
305
            encoding=encoding,
306
            dropna=dropna,
307
        )
308

309
    path_or_buf = stringify_path(path_or_buf)
310
    if isinstance(path_or_buf, str):
311
        with HDFStore(
312
            path_or_buf, mode=mode, complevel=complevel, complib=complib
313
        ) as store:
314
            f(store)
315
    else:
316
        f(path_or_buf)
317

318

319
def read_hdf(
320
    path_or_buf,
321
    key=None,
322
    mode: str = "r",
323
    errors: str = "strict",
324
    where=None,
325
    start: int | None = None,
326
    stop: int | None = None,
327
    columns=None,
328
    iterator=False,
329
    chunksize: int | None = None,
330
    **kwargs,
331
):
332
    """
333
    Read from the store, close it if we opened it.
334

335
    Retrieve pandas object stored in file, optionally based on where
336
    criteria.
337

338
    .. warning::
339

340
       Pandas uses PyTables for reading and writing HDF5 files, which allows
341
       serializing object-dtype data with pickle when using the "fixed" format.
342
       Loading pickled data received from untrusted sources can be unsafe.
343

344
       See: https://docs.python.org/3/library/pickle.html for more.
345

346
    Parameters
347
    ----------
348
    path_or_buf : str, path object, pandas.HDFStore
349
        Any valid string path is acceptable. Only supports the local file system,
350
        remote URLs and file-like objects are not supported.
351

352
        If you want to pass in a path object, pandas accepts any
353
        ``os.PathLike``.
354

355
        Alternatively, pandas accepts an open :class:`pandas.HDFStore` object.
356

357
    key : object, optional
358
        The group identifier in the store. Can be omitted if the HDF file
359
        contains a single pandas object.
360
    mode : {'r', 'r+', 'a'}, default 'r'
361
        Mode to use when opening the file. Ignored if path_or_buf is a
362
        :class:`pandas.HDFStore`. Default is 'r'.
363
    errors : str, default 'strict'
364
        Specifies how encoding and decoding errors are to be handled.
365
        See the errors argument for :func:`open` for a full list
366
        of options.
367
    where : list, optional
368
        A list of Term (or convertible) objects.
369
    start : int, optional
370
        Row number to start selection.
371
    stop  : int, optional
372
        Row number to stop selection.
373
    columns : list, optional
374
        A list of columns names to return.
375
    iterator : bool, optional
376
        Return an iterator object.
377
    chunksize : int, optional
378
        Number of rows to include in an iteration when using an iterator.
379
    **kwargs
380
        Additional keyword arguments passed to HDFStore.
381

382
    Returns
383
    -------
384
    item : object
385
        The selected object. Return type depends on the object stored.
386

387
    See Also
388
    --------
389
    DataFrame.to_hdf : Write a HDF file from a DataFrame.
390
    HDFStore : Low-level access to HDF files.
391

392
    Examples
393
    --------
394
    >>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z'])  # doctest: +SKIP
395
    >>> df.to_hdf('./store.h5', 'data')  # doctest: +SKIP
396
    >>> reread = pd.read_hdf('./store.h5')  # doctest: +SKIP
397
    """
398
    if mode not in ["r", "r+", "a"]:
399
        raise ValueError(
400
            f"mode {mode} is not allowed while performing a read. "
401
            f"Allowed modes are r, r+ and a."
402
        )
403
    # grab the scope
404
    if where is not None:
405
        where = _ensure_term(where, scope_level=1)
406

407
    if isinstance(path_or_buf, HDFStore):
408
        if not path_or_buf.is_open:
409
            raise OSError("The HDFStore must be open for reading.")
410

411
        store = path_or_buf
412
        auto_close = False
413
    else:
414
        path_or_buf = stringify_path(path_or_buf)
415
        if not isinstance(path_or_buf, str):
416
            raise NotImplementedError(
417
                "Support for generic buffers has not been implemented."
418
            )
419
        try:
420
            exists = os.path.exists(path_or_buf)
421

422
        # if filepath is too long
423
        except (TypeError, ValueError):
424
            exists = False
425

426
        if not exists:
427
            raise FileNotFoundError(f"File {path_or_buf} does not exist")
428

429
        store = HDFStore(path_or_buf, mode=mode, errors=errors, **kwargs)
430
        # can't auto open/close if we are using an iterator
431
        # so delegate to the iterator
432
        auto_close = True
433

434
    try:
435
        if key is None:
436
            groups = store.groups()
437
            if len(groups) == 0:
438
                raise ValueError(
439
                    "Dataset(s) incompatible with Pandas data types, "
440
                    "not table, or no datasets found in HDF5 file."
441
                )
442
            candidate_only_group = groups[0]
443

444
            # For the HDF file to have only one dataset, all other groups
445
            # should then be metadata groups for that candidate group. (This
446
            # assumes that the groups() method enumerates parent groups
447
            # before their children.)
448
            for group_to_check in groups[1:]:
449
                if not _is_metadata_of(group_to_check, candidate_only_group):
450
                    raise ValueError(
451
                        "key must be provided when HDF5 "
452
                        "file contains multiple datasets."
453
                    )
454
            key = candidate_only_group._v_pathname
455
        return store.select(
456
            key,
457
            where=where,
458
            start=start,
459
            stop=stop,
460
            columns=columns,
461
            iterator=iterator,
462
            chunksize=chunksize,
463
            auto_close=auto_close,
464
        )
465
    except (ValueError, TypeError, KeyError):
466
        if not isinstance(path_or_buf, HDFStore):
467
            # if there is an error, close the store if we opened it.
468
            with suppress(AttributeError):
469
                store.close()
470

471
        raise
472

473

474
def _is_metadata_of(group: Node, parent_group: Node) -> bool:
475
    """Check if a given group is a metadata group for a given parent_group."""
476
    if group._v_depth <= parent_group._v_depth:
477
        return False
478

479
    current = group
480
    while current._v_depth > 1:
481
        parent = current._v_parent
482
        if parent == parent_group and current._v_name == "meta":
483
            return True
484
        current = current._v_parent
485
    return False
486

487

488
class HDFStore:
489
    """
490
    Dict-like IO interface for storing pandas objects in PyTables.
491

492
    Either Fixed or Table format.
493

494
    .. warning::
495

496
       Pandas uses PyTables for reading and writing HDF5 files, which allows
497
       serializing object-dtype data with pickle when using the "fixed" format.
498
       Loading pickled data received from untrusted sources can be unsafe.
499

500
       See: https://docs.python.org/3/library/pickle.html for more.
501

502
    Parameters
503
    ----------
504
    path : str
505
        File path to HDF5 file.
506
    mode : {'a', 'w', 'r', 'r+'}, default 'a'
507

508
        ``'r'``
509
            Read-only; no data can be modified.
510
        ``'w'``
511
            Write; a new file is created (an existing file with the same
512
            name would be deleted).
513
        ``'a'``
514
            Append; an existing file is opened for reading and writing,
515
            and if the file does not exist it is created.
516
        ``'r+'``
517
            It is similar to ``'a'``, but the file must already exist.
518
    complevel : int, 0-9, default None
519
        Specifies a compression level for data.
520
        A value of 0 or None disables compression.
521
    complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'
522
        Specifies the compression library to be used.
523
        As of v0.20.2 these additional compressors for Blosc are supported
524
        (default if no compressor specified: 'blosc:blosclz'):
525
        {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',
526
         'blosc:zlib', 'blosc:zstd'}.
527
        Specifying a compression library which is not available issues
528
        a ValueError.
529
    fletcher32 : bool, default False
530
        If applying compression use the fletcher32 checksum.
531
    **kwargs
532
        These parameters will be passed to the PyTables open_file method.
533

534
    Examples
535
    --------
536
    >>> bar = pd.DataFrame(np.random.randn(10, 4))
537
    >>> store = pd.HDFStore('test.h5')
538
    >>> store['foo'] = bar   # write to HDF5
539
    >>> bar = store['foo']   # retrieve
540
    >>> store.close()
541

542
    **Create or load HDF5 file in-memory**
543

544
    When passing the `driver` option to the PyTables open_file method through
545
    **kwargs, the HDF5 file is loaded or created in-memory and will only be
546
    written when closed:
547

548
    >>> bar = pd.DataFrame(np.random.randn(10, 4))
549
    >>> store = pd.HDFStore('test.h5', driver='H5FD_CORE')
550
    >>> store['foo'] = bar
551
    >>> store.close()   # only now, data is written to disk
552
    """
553

554
    _handle: File | None
555
    _mode: str
556
    _complevel: int
557
    _fletcher32: bool
558

559
    def __init__(
560
        self,
561
        path,
562
        mode: str = "a",
563
        complevel: int | None = None,
564
        complib=None,
565
        fletcher32: bool = False,
566
        **kwargs,
567
    ):
568

569
        if "format" in kwargs:
570
            raise ValueError("format is not a defined argument for HDFStore")
571

572
        tables = import_optional_dependency("tables")
573

574
        if complib is not None and complib not in tables.filters.all_complibs:
575
            raise ValueError(
576
                f"complib only supports {tables.filters.all_complibs} compression."
577
            )
578

579
        if complib is None and complevel is not None:
580
            complib = tables.filters.default_complib
581

582
        self._path = stringify_path(path)
583
        if mode is None:
584
            mode = "a"
585
        self._mode = mode
586
        self._handle = None
587
        self._complevel = complevel if complevel else 0
588
        self._complib = complib
589
        self._fletcher32 = fletcher32
590
        self._filters = None
591
        self.open(mode=mode, **kwargs)
592

593
    def __fspath__(self):
594
        return self._path
595

596
    @property
597
    def root(self):
598
        """return the root node"""
599
        self._check_if_open()
600
        assert self._handle is not None  # for mypy
601
        return self._handle.root
602

603
    @property
604
    def filename(self):
605
        return self._path
606

607
    def __getitem__(self, key: str):
608
        return self.get(key)
609

610
    def __setitem__(self, key: str, value):
611
        self.put(key, value)
612

613
    def __delitem__(self, key: str):
614
        return self.remove(key)
615

616
    def __getattr__(self, name: str):
617
        """allow attribute access to get stores"""
618
        try:
619
            return self.get(name)
620
        except (KeyError, ClosedFileError):
621
            pass
622
        raise AttributeError(
623
            f"'{type(self).__name__}' object has no attribute '{name}'"
624
        )
625

626
    def __contains__(self, key: str) -> bool:
627
        """
628
        check for existence of this key
629
        can match the exact pathname or the pathnm w/o the leading '/'
630
        """
631
        node = self.get_node(key)
632
        if node is not None:
633
            name = node._v_pathname
634
            if name == key or name[1:] == key:
635
                return True
636
        return False
637

638
    def __len__(self) -> int:
639
        return len(self.groups())
640

641
    def __repr__(self) -> str:
642
        pstr = pprint_thing(self._path)
643
        return f"{type(self)}\nFile path: {pstr}\n"
644

645
    def __enter__(self):
646
        return self
647

648
    def __exit__(self, exc_type, exc_value, traceback):
649
        self.close()
650

651
    def keys(self, include: str = "pandas") -> list[str]:
652
        """
653
        Return a list of keys corresponding to objects stored in HDFStore.
654

655
        Parameters
656
        ----------
657

658
        include : str, default 'pandas'
659
                When kind equals 'pandas' return pandas objects.
660
                When kind equals 'native' return native HDF5 Table objects.
661

662
                .. versionadded:: 1.1.0
663

664
        Returns
665
        -------
666
        list
667
            List of ABSOLUTE path-names (e.g. have the leading '/').
668

669
        Raises
670
        ------
671
        raises ValueError if kind has an illegal value
672
        """
673
        if include == "pandas":
674
            return [n._v_pathname for n in self.groups()]
675

676
        elif include == "native":
677
            assert self._handle is not None  # mypy
678
            return [
679
                n._v_pathname for n in self._handle.walk_nodes("/", classname="Table")
680
            ]
681
        raise ValueError(
682
            f"`include` should be either 'pandas' or 'native' but is '{include}'"
683
        )
684

685
    def __iter__(self):
686
        return iter(self.keys())
687

688
    def items(self):
689
        """
690
        iterate on key->group
691
        """
692
        for g in self.groups():
693
            yield g._v_pathname, g
694

695
    iteritems = items
696

697
    def open(self, mode: str = "a", **kwargs):
698
        """
699
        Open the file in the specified mode
700

701
        Parameters
702
        ----------
703
        mode : {'a', 'w', 'r', 'r+'}, default 'a'
704
            See HDFStore docstring or tables.open_file for info about modes
705
        **kwargs
706
            These parameters will be passed to the PyTables open_file method.
707
        """
708
        tables = _tables()
709

710
        if self._mode != mode:
711
            # if we are changing a write mode to read, ok
712
            if self._mode in ["a", "w"] and mode in ["r", "r+"]:
713
                pass
714
            elif mode in ["w"]:
715
                # this would truncate, raise here
716
                if self.is_open:
717
                    raise PossibleDataLossError(
718
                        f"Re-opening the file [{self._path}] with mode [{self._mode}] "
719
                        "will delete the current file!"
720
                    )
721

722
            self._mode = mode
723

724
        # close and reopen the handle
725
        if self.is_open:
726
            self.close()
727

728
        if self._complevel and self._complevel > 0:
729
            self._filters = _tables().Filters(
730
                self._complevel, self._complib, fletcher32=self._fletcher32
731
            )
732

733
        if _table_file_open_policy_is_strict and self.is_open:
734
            msg = (
735
                "Cannot open HDF5 file, which is already opened, "
736
                "even in read-only mode."
737
            )
738
            raise ValueError(msg)
739

740
        self._handle = tables.open_file(self._path, self._mode, **kwargs)
741

742
    def close(self):
743
        """
744
        Close the PyTables file handle
745
        """
746
        if self._handle is not None:
747
            self._handle.close()
748
        self._handle = None
749

750
    @property
751
    def is_open(self) -> bool:
752
        """
753
        return a boolean indicating whether the file is open
754
        """
755
        if self._handle is None:
756
            return False
757
        return bool(self._handle.isopen)
758

759
    def flush(self, fsync: bool = False):
760
        """
761
        Force all buffered modifications to be written to disk.
762

763
        Parameters
764
        ----------
765
        fsync : bool (default False)
766
          call ``os.fsync()`` on the file handle to force writing to disk.
767

768
        Notes
769
        -----
770
        Without ``fsync=True``, flushing may not guarantee that the OS writes
771
        to disk. With fsync, the operation will block until the OS claims the
772
        file has been written; however, other caching layers may still
773
        interfere.
774
        """
775
        if self._handle is not None:
776
            self._handle.flush()
777
            if fsync:
778
                with suppress(OSError):
779
                    os.fsync(self._handle.fileno())
780

781
    def get(self, key: str):
782
        """
783
        Retrieve pandas object stored in file.
784

785
        Parameters
786
        ----------
787
        key : str
788

789
        Returns
790
        -------
791
        object
792
            Same type as object stored in file.
793
        """
794
        with patch_pickle():
795
            # GH#31167 Without this patch, pickle doesn't know how to unpickle
796
            #  old DateOffset objects now that they are cdef classes.
797
            group = self.get_node(key)
798
            if group is None:
799
                raise KeyError(f"No object named {key} in the file")
800
            return self._read_group(group)
801

802
    def select(
803
        self,
804
        key: str,
805
        where=None,
806
        start=None,
807
        stop=None,
808
        columns=None,
809
        iterator=False,
810
        chunksize=None,
811
        auto_close: bool = False,
812
    ):
813
        """
814
        Retrieve pandas object stored in file, optionally based on where criteria.
815

816
        .. warning::
817

818
           Pandas uses PyTables for reading and writing HDF5 files, which allows
819
           serializing object-dtype data with pickle when using the "fixed" format.
820
           Loading pickled data received from untrusted sources can be unsafe.
821

822
           See: https://docs.python.org/3/library/pickle.html for more.
823

824
        Parameters
825
        ----------
826
        key : str
827
            Object being retrieved from file.
828
        where : list or None
829
            List of Term (or convertible) objects, optional.
830
        start : int or None
831
            Row number to start selection.
832
        stop : int, default None
833
            Row number to stop selection.
834
        columns : list or None
835
            A list of columns that if not None, will limit the return columns.
836
        iterator : bool or False
837
            Returns an iterator.
838
        chunksize : int or None
839
            Number or rows to include in iteration, return an iterator.
840
        auto_close : bool or False
841
            Should automatically close the store when finished.
842

843
        Returns
844
        -------
845
        object
846
            Retrieved object from file.
847
        """
848
        group = self.get_node(key)
849
        if group is None:
850
            raise KeyError(f"No object named {key} in the file")
851

852
        # create the storer and axes
853
        where = _ensure_term(where, scope_level=1)
854
        s = self._create_storer(group)
855
        s.infer_axes()
856

857
        # function to call on iteration
858
        def func(_start, _stop, _where):
859
            return s.read(start=_start, stop=_stop, where=_where, columns=columns)
860

861
        # create the iterator
862
        it = TableIterator(
863
            self,
864
            s,
865
            func,
866
            where=where,
867
            nrows=s.nrows,
868
            start=start,
869
            stop=stop,
870
            iterator=iterator,
871
            chunksize=chunksize,
872
            auto_close=auto_close,
873
        )
874

875
        return it.get_result()
876

877
    def select_as_coordinates(
878
        self,
879
        key: str,
880
        where=None,
881
        start: int | None = None,
882
        stop: int | None = None,
883
    ):
884
        """
885
        return the selection as an Index
886

887
        .. warning::
888

889
           Pandas uses PyTables for reading and writing HDF5 files, which allows
890
           serializing object-dtype data with pickle when using the "fixed" format.
891
           Loading pickled data received from untrusted sources can be unsafe.
892

893
           See: https://docs.python.org/3/library/pickle.html for more.
894

895

896
        Parameters
897
        ----------
898
        key : str
899
        where : list of Term (or convertible) objects, optional
900
        start : integer (defaults to None), row number to start selection
901
        stop  : integer (defaults to None), row number to stop selection
902
        """
903
        where = _ensure_term(where, scope_level=1)
904
        tbl = self.get_storer(key)
905
        if not isinstance(tbl, Table):
906
            raise TypeError("can only read_coordinates with a table")
907
        return tbl.read_coordinates(where=where, start=start, stop=stop)
908

909
    def select_column(
910
        self,
911
        key: str,
912
        column: str,
913
        start: int | None = None,
914
        stop: int | None = None,
915
    ):
916
        """
917
        return a single column from the table. This is generally only useful to
918
        select an indexable
919

920
        .. warning::
921

922
           Pandas uses PyTables for reading and writing HDF5 files, which allows
923
           serializing object-dtype data with pickle when using the "fixed" format.
924
           Loading pickled data received from untrusted sources can be unsafe.
925

926
           See: https://docs.python.org/3/library/pickle.html for more.
927

928
        Parameters
929
        ----------
930
        key : str
931
        column : str
932
            The column of interest.
933
        start : int or None, default None
934
        stop : int or None, default None
935

936
        Raises
937
        ------
938
        raises KeyError if the column is not found (or key is not a valid
939
            store)
940
        raises ValueError if the column can not be extracted individually (it
941
            is part of a data block)
942

943
        """
944
        tbl = self.get_storer(key)
945
        if not isinstance(tbl, Table):
946
            raise TypeError("can only read_column with a table")
947
        return tbl.read_column(column=column, start=start, stop=stop)
948

949
    def select_as_multiple(
950
        self,
951
        keys,
952
        where=None,
953
        selector=None,
954
        columns=None,
955
        start=None,
956
        stop=None,
957
        iterator=False,
958
        chunksize=None,
959
        auto_close: bool = False,
960
    ):
961
        """
962
        Retrieve pandas objects from multiple tables.
963

964
        .. warning::
965

966
           Pandas uses PyTables for reading and writing HDF5 files, which allows
967
           serializing object-dtype data with pickle when using the "fixed" format.
968
           Loading pickled data received from untrusted sources can be unsafe.
969

970
           See: https://docs.python.org/3/library/pickle.html for more.
971

972
        Parameters
973
        ----------
974
        keys : a list of the tables
975
        selector : the table to apply the where criteria (defaults to keys[0]
976
            if not supplied)
977
        columns : the columns I want back
978
        start : integer (defaults to None), row number to start selection
979
        stop  : integer (defaults to None), row number to stop selection
980
        iterator : bool, return an iterator, default False
981
        chunksize : nrows to include in iteration, return an iterator
982
        auto_close : bool, default False
983
            Should automatically close the store when finished.
984

985
        Raises
986
        ------
987
        raises KeyError if keys or selector is not found or keys is empty
988
        raises TypeError if keys is not a list or tuple
989
        raises ValueError if the tables are not ALL THE SAME DIMENSIONS
990
        """
991
        # default to single select
992
        where = _ensure_term(where, scope_level=1)
993
        if isinstance(keys, (list, tuple)) and len(keys) == 1:
994
            keys = keys[0]
995
        if isinstance(keys, str):
996
            return self.select(
997
                key=keys,
998
                where=where,
999
                columns=columns,
1000
                start=start,
1001
                stop=stop,
1002
                iterator=iterator,
1003
                chunksize=chunksize,
1004
                auto_close=auto_close,
1005
            )
1006

1007
        if not isinstance(keys, (list, tuple)):
1008
            raise TypeError("keys must be a list/tuple")
1009

1010
        if not len(keys):
1011
            raise ValueError("keys must have a non-zero length")
1012

1013
        if selector is None:
1014
            selector = keys[0]
1015

1016
        # collect the tables
1017
        tbls = [self.get_storer(k) for k in keys]
1018
        s = self.get_storer(selector)
1019

1020
        # validate rows
1021
        nrows = None
1022
        for t, k in itertools.chain([(s, selector)], zip(tbls, keys)):
1023
            if t is None:
1024
                raise KeyError(f"Invalid table [{k}]")
1025
            if not t.is_table:
1026
                raise TypeError(
1027
                    f"object [{t.pathname}] is not a table, and cannot be used in all "
1028
                    "select as multiple"
1029
                )
1030

1031
            if nrows is None:
1032
                nrows = t.nrows
1033
            elif t.nrows != nrows:
1034
                raise ValueError("all tables must have exactly the same nrows!")
1035

1036
        # The isinstance checks here are redundant with the check above,
1037
        #  but necessary for mypy; see GH#29757
1038
        _tbls = [x for x in tbls if isinstance(x, Table)]
1039

1040
        # axis is the concentration axes
1041
        axis = list({t.non_index_axes[0][0] for t in _tbls})[0]
1042

1043
        def func(_start, _stop, _where):
1044

1045
            # retrieve the objs, _where is always passed as a set of
1046
            # coordinates here
1047
            objs = [
1048
                t.read(where=_where, columns=columns, start=_start, stop=_stop)
1049
                for t in tbls
1050
            ]
1051

1052
            # concat and return
1053
            return concat(objs, axis=axis, verify_integrity=False)._consolidate()
1054

1055
        # create the iterator
1056
        it = TableIterator(
1057
            self,
1058
            s,
1059
            func,
1060
            where=where,
1061
            nrows=nrows,
1062
            start=start,
1063
            stop=stop,
1064
            iterator=iterator,
1065
            chunksize=chunksize,
1066
            auto_close=auto_close,
1067
        )
1068

1069
        return it.get_result(coordinates=True)
1070

1071
    def put(
1072
        self,
1073
        key: str,
1074
        value: DataFrame | Series,
1075
        format=None,
1076
        index=True,
1077
        append=False,
1078
        complib=None,
1079
        complevel: int | None = None,
1080
        min_itemsize: int | dict[str, int] | None = None,
1081
        nan_rep=None,
1082
        data_columns: Literal[True] | list[str] | None = None,
1083
        encoding=None,
1084
        errors: str = "strict",
1085
        track_times: bool = True,
1086
        dropna: bool = False,
1087
    ):
1088
        """
1089
        Store object in HDFStore.
1090

1091
        Parameters
1092
        ----------
1093
        key : str
1094
        value : {Series, DataFrame}
1095
        format : 'fixed(f)|table(t)', default is 'fixed'
1096
            Format to use when storing object in HDFStore. Value can be one of:
1097

1098
            ``'fixed'``
1099
                Fixed format.  Fast writing/reading. Not-appendable, nor searchable.
1100
            ``'table'``
1101
                Table format.  Write as a PyTables Table structure which may perform
1102
                worse but allow more flexible operations like searching / selecting
1103
                subsets of the data.
1104
        append : bool, default False
1105
            This will force Table format, append the input data to the existing.
1106
        data_columns : list of columns or True, default None
1107
            List of columns to create as data columns, or True to use all columns.
1108
            See `here
1109
            <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
1110
        encoding : str, default None
1111
            Provide an encoding for strings.
1112
        track_times : bool, default True
1113
            Parameter is propagated to 'create_table' method of 'PyTables'.
1114
            If set to False it enables to have the same h5 files (same hashes)
1115
            independent on creation time.
1116

1117
            .. versionadded:: 1.1.0
1118
        """
1119
        if format is None:
1120
            format = get_option("io.hdf.default_format") or "fixed"
1121
        format = self._validate_format(format)
1122
        self._write_to_group(
1123
            key,
1124
            value,
1125
            format=format,
1126
            index=index,
1127
            append=append,
1128
            complib=complib,
1129
            complevel=complevel,
1130
            min_itemsize=min_itemsize,
1131
            nan_rep=nan_rep,
1132
            data_columns=data_columns,
1133
            encoding=encoding,
1134
            errors=errors,
1135
            track_times=track_times,
1136
            dropna=dropna,
1137
        )
1138

1139
    def remove(self, key: str, where=None, start=None, stop=None):
1140
        """
1141
        Remove pandas object partially by specifying the where condition
1142

1143
        Parameters
1144
        ----------
1145
        key : str
1146
            Node to remove or delete rows from
1147
        where : list of Term (or convertible) objects, optional
1148
        start : integer (defaults to None), row number to start selection
1149
        stop  : integer (defaults to None), row number to stop selection
1150

1151
        Returns
1152
        -------
1153
        number of rows removed (or None if not a Table)
1154

1155
        Raises
1156
        ------
1157
        raises KeyError if key is not a valid store
1158

1159
        """
1160
        where = _ensure_term(where, scope_level=1)
1161
        try:
1162
            s = self.get_storer(key)
1163
        except KeyError:
1164
            # the key is not a valid store, re-raising KeyError
1165
            raise
1166
        except AssertionError:
1167
            # surface any assertion errors for e.g. debugging
1168
            raise
1169
        except Exception as err:
1170
            # In tests we get here with ClosedFileError, TypeError, and
1171
            #  _table_mod.NoSuchNodeError.  TODO: Catch only these?
1172

1173
            if where is not None:
1174
                raise ValueError(
1175
                    "trying to remove a node with a non-None where clause!"
1176
                ) from err
1177

1178
            # we are actually trying to remove a node (with children)
1179
            node = self.get_node(key)
1180
            if node is not None:
1181
                node._f_remove(recursive=True)
1182
                return None
1183

1184
        # remove the node
1185
        if com.all_none(where, start, stop):
1186
            s.group._f_remove(recursive=True)
1187

1188
        # delete from the table
1189
        else:
1190
            if not s.is_table:
1191
                raise ValueError(
1192
                    "can only remove with where on objects written as tables"
1193
                )
1194
            return s.delete(where=where, start=start, stop=stop)
1195

1196
    def append(
1197
        self,
1198
        key: str,
1199
        value: DataFrame | Series,
1200
        format=None,
1201
        axes=None,
1202
        index=True,
1203
        append=True,
1204
        complib=None,
1205
        complevel: int | None = None,
1206
        columns=None,
1207
        min_itemsize: int | dict[str, int] | None = None,
1208
        nan_rep=None,
1209
        chunksize=None,
1210
        expectedrows=None,
1211
        dropna: bool | None = None,
1212
        data_columns: Literal[True] | list[str] | None = None,
1213
        encoding=None,
1214
        errors: str = "strict",
1215
    ):
1216
        """
1217
        Append to Table in file. Node must already exist and be Table
1218
        format.
1219

1220
        Parameters
1221
        ----------
1222
        key : str
1223
        value : {Series, DataFrame}
1224
        format : 'table' is the default
1225
            Format to use when storing object in HDFStore.  Value can be one of:
1226

1227
            ``'table'``
1228
                Table format. Write as a PyTables Table structure which may perform
1229
                worse but allow more flexible operations like searching / selecting
1230
                subsets of the data.
1231
        append       : bool, default True
1232
            Append the input data to the existing.
1233
        data_columns : list of columns, or True, default None
1234
            List of columns to create as indexed data columns for on-disk
1235
            queries, or True to use all columns. By default only the axes
1236
            of the object are indexed. See `here
1237
            <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
1238
        min_itemsize : dict of columns that specify minimum str sizes
1239
        nan_rep      : str to use as str nan representation
1240
        chunksize    : size to chunk the writing
1241
        expectedrows : expected TOTAL row size of this table
1242
        encoding     : default None, provide an encoding for str
1243
        dropna : bool, default False
1244
            Do not write an ALL nan row to the store settable
1245
            by the option 'io.hdf.dropna_table'.
1246

1247
        Notes
1248
        -----
1249
        Does *not* check if data being appended overlaps with existing
1250
        data in the table, so be careful
1251
        """
1252
        if columns is not None:
1253
            raise TypeError(
1254
                "columns is not a supported keyword in append, try data_columns"
1255
            )
1256

1257
        if dropna is None:
1258
            dropna = get_option("io.hdf.dropna_table")
1259
        if format is None:
1260
            format = get_option("io.hdf.default_format") or "table"
1261
        format = self._validate_format(format)
1262
        self._write_to_group(
1263
            key,
1264
            value,
1265
            format=format,
1266
            axes=axes,
1267
            index=index,
1268
            append=append,
1269
            complib=complib,
1270
            complevel=complevel,
1271
            min_itemsize=min_itemsize,
1272
            nan_rep=nan_rep,
1273
            chunksize=chunksize,
1274
            expectedrows=expectedrows,
1275
            dropna=dropna,
1276
            data_columns=data_columns,
1277
            encoding=encoding,
1278
            errors=errors,
1279
        )
1280

1281
    def append_to_multiple(
1282
        self,
1283
        d: dict,
1284
        value,
1285
        selector,
1286
        data_columns=None,
1287
        axes=None,
1288
        dropna=False,
1289
        **kwargs,
1290
    ):
1291
        """
1292
        Append to multiple tables
1293

1294
        Parameters
1295
        ----------
1296
        d : a dict of table_name to table_columns, None is acceptable as the
1297
            values of one node (this will get all the remaining columns)
1298
        value : a pandas object
1299
        selector : a string that designates the indexable table; all of its
1300
            columns will be designed as data_columns, unless data_columns is
1301
            passed, in which case these are used
1302
        data_columns : list of columns to create as data columns, or True to
1303
            use all columns
1304
        dropna : if evaluates to True, drop rows from all tables if any single
1305
                 row in each table has all NaN. Default False.
1306

1307
        Notes
1308
        -----
1309
        axes parameter is currently not accepted
1310

1311
        """
1312
        if axes is not None:
1313
            raise TypeError(
1314
                "axes is currently not accepted as a parameter to append_to_multiple; "
1315
                "you can create the tables independently instead"
1316
            )
1317

1318
        if not isinstance(d, dict):
1319
            raise ValueError(
1320
                "append_to_multiple must have a dictionary specified as the "
1321
                "way to split the value"
1322
            )
1323

1324
        if selector not in d:
1325
            raise ValueError(
1326
                "append_to_multiple requires a selector that is in passed dict"
1327
            )
1328

1329
        # figure out the splitting axis (the non_index_axis)
1330
        axis = list(set(range(value.ndim)) - set(_AXES_MAP[type(value)]))[0]
1331

1332
        # figure out how to split the value
1333
        remain_key = None
1334
        remain_values: list = []
1335
        for k, v in d.items():
1336
            if v is None:
1337
                if remain_key is not None:
1338
                    raise ValueError(
1339
                        "append_to_multiple can only have one value in d that is None"
1340
                    )
1341
                remain_key = k
1342
            else:
1343
                remain_values.extend(v)
1344
        if remain_key is not None:
1345
            ordered = value.axes[axis]
1346
            ordd = ordered.difference(Index(remain_values))
1347
            ordd = sorted(ordered.get_indexer(ordd))
1348
            d[remain_key] = ordered.take(ordd)
1349

1350
        # data_columns
1351
        if data_columns is None:
1352
            data_columns = d[selector]
1353

1354
        # ensure rows are synchronized across the tables
1355
        if dropna:
1356
            idxs = (value[cols].dropna(how="all").index for cols in d.values())
1357
            valid_index = next(idxs)
1358
            for index in idxs:
1359
                valid_index = valid_index.intersection(index)
1360
            value = value.loc[valid_index]
1361

1362
        min_itemsize = kwargs.pop("min_itemsize", None)
1363

1364
        # append
1365
        for k, v in d.items():
1366
            dc = data_columns if k == selector else None
1367

1368
            # compute the val
1369
            val = value.reindex(v, axis=axis)
1370

1371
            filtered = (
1372
                {key: value for (key, value) in min_itemsize.items() if key in v}
1373
                if min_itemsize is not None
1374
                else None
1375
            )
1376
            self.append(k, val, data_columns=dc, min_itemsize=filtered, **kwargs)
1377

1378
    def create_table_index(
1379
        self,
1380
        key: str,
1381
        columns=None,
1382
        optlevel: int | None = None,
1383
        kind: str | None = None,
1384
    ):
1385
        """
1386
        Create a pytables index on the table.
1387

1388
        Parameters
1389
        ----------
1390
        key : str
1391
        columns : None, bool, or listlike[str]
1392
            Indicate which columns to create an index on.
1393

1394
            * False : Do not create any indexes.
1395
            * True : Create indexes on all columns.
1396
            * None : Create indexes on all columns.
1397
            * listlike : Create indexes on the given columns.
1398

1399
        optlevel : int or None, default None
1400
            Optimization level, if None, pytables defaults to 6.
1401
        kind : str or None, default None
1402
            Kind of index, if None, pytables defaults to "medium".
1403

1404
        Raises
1405
        ------
1406
        TypeError: raises if the node is not a table
1407
        """
1408
        # version requirements
1409
        _tables()
1410
        s = self.get_storer(key)
1411
        if s is None:
1412
            return
1413

1414
        if not isinstance(s, Table):
1415
            raise TypeError("cannot create table index on a Fixed format store")
1416
        s.create_index(columns=columns, optlevel=optlevel, kind=kind)
1417

1418
    def groups(self):
1419
        """
1420
        Return a list of all the top-level nodes.
1421

1422
        Each node returned is not a pandas storage object.
1423

1424
        Returns
1425
        -------
1426
        list
1427
            List of objects.
1428
        """
1429
        _tables()
1430
        self._check_if_open()
1431
        assert self._handle is not None  # for mypy
1432
        assert _table_mod is not None  # for mypy
1433
        return [
1434
            g
1435
            for g in self._handle.walk_groups()
1436
            if (
1437
                not isinstance(g, _table_mod.link.Link)
1438
                and (
1439
                    getattr(g._v_attrs, "pandas_type", None)
1440
                    or getattr(g, "table", None)
1441
                    or (isinstance(g, _table_mod.table.Table) and g._v_name != "table")
1442
                )
1443
            )
1444
        ]
1445

1446
    def walk(self, where="/"):
1447
        """
1448
        Walk the pytables group hierarchy for pandas objects.
1449

1450
        This generator will yield the group path, subgroups and pandas object
1451
        names for each group.
1452

1453
        Any non-pandas PyTables objects that are not a group will be ignored.
1454

1455
        The `where` group itself is listed first (preorder), then each of its
1456
        child groups (following an alphanumerical order) is also traversed,
1457
        following the same procedure.
1458

1459
        Parameters
1460
        ----------
1461
        where : str, default "/"
1462
            Group where to start walking.
1463

1464
        Yields
1465
        ------
1466
        path : str
1467
            Full path to a group (without trailing '/').
1468
        groups : list
1469
            Names (strings) of the groups contained in `path`.
1470
        leaves : list
1471
            Names (strings) of the pandas objects contained in `path`.
1472
        """
1473
        _tables()
1474
        self._check_if_open()
1475
        assert self._handle is not None  # for mypy
1476
        assert _table_mod is not None  # for mypy
1477

1478
        for g in self._handle.walk_groups(where):
1479
            if getattr(g._v_attrs, "pandas_type", None) is not None:
1480
                continue
1481

1482
            groups = []
1483
            leaves = []
1484
            for child in g._v_children.values():
1485
                pandas_type = getattr(child._v_attrs, "pandas_type", None)
1486
                if pandas_type is None:
1487
                    if isinstance(child, _table_mod.group.Group):
1488
                        groups.append(child._v_name)
1489
                else:
1490
                    leaves.append(child._v_name)
1491

1492
            yield (g._v_pathname.rstrip("/"), groups, leaves)
1493

1494
    def get_node(self, key: str) -> Node | None:
1495
        """return the node with the key or None if it does not exist"""
1496
        self._check_if_open()
1497
        if not key.startswith("/"):
1498
            key = "/" + key
1499

1500
        assert self._handle is not None
1501
        assert _table_mod is not None  # for mypy
1502
        try:
1503
            node = self._handle.get_node(self.root, key)
1504
        except _table_mod.exceptions.NoSuchNodeError:
1505
            return None
1506

1507
        assert isinstance(node, _table_mod.Node), type(node)
1508
        return node
1509

1510
    def get_storer(self, key: str) -> GenericFixed | Table:
1511
        """return the storer object for a key, raise if not in the file"""
1512
        group = self.get_node(key)
1513
        if group is None:
1514
            raise KeyError(f"No object named {key} in the file")
1515

1516
        s = self._create_storer(group)
1517
        s.infer_axes()
1518
        return s
1519

1520
    def copy(
1521
        self,
1522
        file,
1523
        mode="w",
1524
        propindexes: bool = True,
1525
        keys=None,
1526
        complib=None,
1527
        complevel: int | None = None,
1528
        fletcher32: bool = False,
1529
        overwrite=True,
1530
    ):
1531
        """
1532
        Copy the existing store to a new file, updating in place.
1533

1534
        Parameters
1535
        ----------
1536
        propindexes : bool, default True
1537
            Restore indexes in copied file.
1538
        keys : list, optional
1539
            List of keys to include in the copy (defaults to all).
1540
        overwrite : bool, default True
1541
            Whether to overwrite (remove and replace) existing nodes in the new store.
1542
        mode, complib, complevel, fletcher32 same as in HDFStore.__init__
1543

1544
        Returns
1545
        -------
1546
        open file handle of the new store
1547
        """
1548
        new_store = HDFStore(
1549
            file, mode=mode, complib=complib, complevel=complevel, fletcher32=fletcher32
1550
        )
1551
        if keys is None:
1552
            keys = list(self.keys())
1553
        if not isinstance(keys, (tuple, list)):
1554
            keys = [keys]
1555
        for k in keys:
1556
            s = self.get_storer(k)
1557
            if s is not None:
1558

1559
                if k in new_store:
1560
                    if overwrite:
1561
                        new_store.remove(k)
1562

1563
                data = self.select(k)
1564
                if isinstance(s, Table):
1565

1566
                    index: bool | list[str] = False
1567
                    if propindexes:
1568
                        index = [a.name for a in s.axes if a.is_indexed]
1569
                    new_store.append(
1570
                        k,
1571
                        data,
1572
                        index=index,
1573
                        data_columns=getattr(s, "data_columns", None),
1574
                        encoding=s.encoding,
1575
                    )
1576
                else:
1577
                    new_store.put(k, data, encoding=s.encoding)
1578

1579
        return new_store
1580

1581
    def info(self) -> str:
1582
        """
1583
        Print detailed information on the store.
1584

1585
        Returns
1586
        -------
1587
        str
1588
        """
1589
        path = pprint_thing(self._path)
1590
        output = f"{type(self)}\nFile path: {path}\n"
1591

1592
        if self.is_open:
1593
            lkeys = sorted(self.keys())
1594
            if len(lkeys):
1595
                keys = []
1596
                values = []
1597

1598
                for k in lkeys:
1599
                    try:
1600
                        s = self.get_storer(k)
1601
                        if s is not None:
1602
                            keys.append(pprint_thing(s.pathname or k))
1603
                            values.append(pprint_thing(s or "invalid_HDFStore node"))
1604
                    except AssertionError:
1605
                        # surface any assertion errors for e.g. debugging
1606
                        raise
1607
                    except Exception as detail:
1608
                        keys.append(k)
1609
                        dstr = pprint_thing(detail)
1610
                        values.append(f"[invalid_HDFStore node: {dstr}]")
1611

1612
                output += adjoin(12, keys, values)
1613
            else:
1614
                output += "Empty"
1615
        else:
1616
            output += "File is CLOSED"
1617

1618
        return output
1619

1620
    # ------------------------------------------------------------------------
1621
    # private methods
1622

1623
    def _check_if_open(self):
1624
        if not self.is_open:
1625
            raise ClosedFileError(f"{self._path} file is not open!")
1626

1627
    def _validate_format(self, format: str) -> str:
1628
        """validate / deprecate formats"""
1629
        # validate
1630
        try:
1631
            format = _FORMAT_MAP[format.lower()]
1632
        except KeyError as err:
1633
            raise TypeError(f"invalid HDFStore format specified [{format}]") from err
1634

1635
        return format
1636

1637
    def _create_storer(
1638
        self,
1639
        group,
1640
        format=None,
1641
        value: DataFrame | Series | None = None,
1642
        encoding: str = "UTF-8",
1643
        errors: str = "strict",
1644
    ) -> GenericFixed | Table:
1645
        """return a suitable class to operate"""
1646
        cls: type[GenericFixed] | type[Table]
1647

1648
        if value is not None and not isinstance(value, (Series, DataFrame)):
1649
            raise TypeError("value must be None, Series, or DataFrame")
1650

1651
        def error(t):
1652
            # return instead of raising so mypy can tell where we are raising
1653
            return TypeError(
1654
                f"cannot properly create the storer for: [{t}] [group->"
1655
                f"{group},value->{type(value)},format->{format}"
1656
            )
1657

1658
        pt = _ensure_decoded(getattr(group._v_attrs, "pandas_type", None))
1659
        tt = _ensure_decoded(getattr(group._v_attrs, "table_type", None))
1660

1661
        # infer the pt from the passed value
1662
        if pt is None:
1663
            if value is None:
1664
                _tables()
1665
                assert _table_mod is not None  # for mypy
1666
                if getattr(group, "table", None) or isinstance(
1667
                    group, _table_mod.table.Table
1668
                ):
1669
                    pt = "frame_table"
1670
                    tt = "generic_table"
1671
                else:
1672
                    raise TypeError(
1673
                        "cannot create a storer if the object is not existing "
1674
                        "nor a value are passed"
1675
                    )
1676
            else:
1677
                if isinstance(value, Series):
1678
                    pt = "series"
1679
                else:
1680
                    pt = "frame"
1681

1682
                # we are actually a table
1683
                if format == "table":
1684
                    pt += "_table"
1685

1686
        # a storer node
1687
        if "table" not in pt:
1688
            _STORER_MAP = {"series": SeriesFixed, "frame": FrameFixed}
1689
            try:
1690
                cls = _STORER_MAP[pt]
1691
            except KeyError as err:
1692
                raise error("_STORER_MAP") from err
1693
            return cls(self, group, encoding=encoding, errors=errors)
1694

1695
        # existing node (and must be a table)
1696
        if tt is None:
1697
            # if we are a writer, determine the tt
1698
            if value is not None:
1699
                if pt == "series_table":
1700
                    index = getattr(value, "index", None)
1701
                    if index is not None:
1702
                        if index.nlevels == 1:
1703
                            tt = "appendable_series"
1704
                        elif index.nlevels > 1:
1705
                            tt = "appendable_multiseries"
1706
                elif pt == "frame_table":
1707
                    index = getattr(value, "index", None)
1708
                    if index is not None:
1709
                        if index.nlevels == 1:
1710
                            tt = "appendable_frame"
1711
                        elif index.nlevels > 1:
1712
                            tt = "appendable_multiframe"
1713

1714
        _TABLE_MAP = {
1715
            "generic_table": GenericTable,
1716
            "appendable_series": AppendableSeriesTable,
1717
            "appendable_multiseries": AppendableMultiSeriesTable,
1718
            "appendable_frame": AppendableFrameTable,
1719
            "appendable_multiframe": AppendableMultiFrameTable,
1720
            "worm": WORMTable,
1721
        }
1722
        try:
1723
            cls = _TABLE_MAP[tt]
1724
        except KeyError as err:
1725
            raise error("_TABLE_MAP") from err
1726

1727
        return cls(self, group, encoding=encoding, errors=errors)
1728

1729
    def _write_to_group(
1730
        self,
1731
        key: str,
1732
        value: DataFrame | Series,
1733
        format,
1734
        axes=None,
1735
        index=True,
1736
        append=False,
1737
        complib=None,
1738
        complevel: int | None = None,
1739
        fletcher32=None,
1740
        min_itemsize: int | dict[str, int] | None = None,
1741
        chunksize=None,
1742
        expectedrows=None,
1743
        dropna=False,
1744
        nan_rep=None,
1745
        data_columns=None,
1746
        encoding=None,
1747
        errors: str = "strict",
1748
        track_times: bool = True,
1749
    ) -> None:
1750
        # we don't want to store a table node at all if our object is 0-len
1751
        # as there are not dtypes
1752
        if getattr(value, "empty", None) and (format == "table" or append):
1753
            return
1754

1755
        group = self._identify_group(key, append)
1756

1757
        s = self._create_storer(group, format, value, encoding=encoding, errors=errors)
1758
        if append:
1759
            # raise if we are trying to append to a Fixed format,
1760
            #       or a table that exists (and we are putting)
1761
            if not s.is_table or (s.is_table and format == "fixed" and s.is_exists):
1762
                raise ValueError("Can only append to Tables")
1763
            if not s.is_exists:
1764
                s.set_object_info()
1765
        else:
1766
            s.set_object_info()
1767

1768
        if not s.is_table and complib:
1769
            raise ValueError("Compression not supported on Fixed format stores")
1770

1771
        # write the object
1772
        s.write(
1773
            obj=value,
1774
            axes=axes,
1775
            append=append,
1776
            complib=complib,
1777
            complevel=complevel,
1778
            fletcher32=fletcher32,
1779
            min_itemsize=min_itemsize,
1780
            chunksize=chunksize,
1781
            expectedrows=expectedrows,
1782
            dropna=dropna,
1783
            nan_rep=nan_rep,
1784
            data_columns=data_columns,
1785
            track_times=track_times,
1786
        )
1787

1788
        if isinstance(s, Table) and index:
1789
            s.create_index(columns=index)
1790

1791
    def _read_group(self, group: Node):
1792
        s = self._create_storer(group)
1793
        s.infer_axes()
1794
        return s.read()
1795

1796
    def _identify_group(self, key: str, append: bool) -> Node:
1797
        """Identify HDF5 group based on key, delete/create group if needed."""
1798
        group = self.get_node(key)
1799

1800
        # we make this assertion for mypy; the get_node call will already
1801
        # have raised if this is incorrect
1802
        assert self._handle is not None
1803

1804
        # remove the node if we are not appending
1805
        if group is not None and not append:
1806
            self._handle.remove_node(group, recursive=True)
1807
            group = None
1808

1809
        if group is None:
1810
            group = self._create_nodes_and_group(key)
1811

1812
        return group
1813

1814
    def _create_nodes_and_group(self, key: str) -> Node:
1815
        """Create nodes from key and return group name."""
1816
        # assertion for mypy
1817
        assert self._handle is not None
1818

1819
        paths = key.split("/")
1820
        # recursively create the groups
1821
        path = "/"
1822
        for p in paths:
1823
            if not len(p):
1824
                continue
1825
            new_path = path
1826
            if not path.endswith("/"):
1827
                new_path += "/"
1828
            new_path += p
1829
            group = self.get_node(new_path)
1830
            if group is None:
1831
                group = self._handle.create_group(path, p)
1832
            path = new_path
1833
        return group
1834

1835

1836
class TableIterator:
1837
    """
1838
    Define the iteration interface on a table
1839

1840
    Parameters
1841
    ----------
1842
    store : HDFStore
1843
    s     : the referred storer
1844
    func  : the function to execute the query
1845
    where : the where of the query
1846
    nrows : the rows to iterate on
1847
    start : the passed start value (default is None)
1848
    stop  : the passed stop value (default is None)
1849
    iterator : bool, default False
1850
        Whether to use the default iterator.
1851
    chunksize : the passed chunking value (default is 100000)
1852
    auto_close : bool, default False
1853
        Whether to automatically close the store at the end of iteration.
1854
    """
1855

1856
    chunksize: int | None
1857
    store: HDFStore
1858
    s: GenericFixed | Table
1859

1860
    def __init__(
1861
        self,
1862
        store: HDFStore,
1863
        s: GenericFixed | Table,
1864
        func,
1865
        where,
1866
        nrows,
1867
        start=None,
1868
        stop=None,
1869
        iterator: bool = False,
1870
        chunksize: int | None = None,
1871
        auto_close: bool = False,
1872
    ):
1873
        self.store = store
1874
        self.s = s
1875
        self.func = func
1876
        self.where = where
1877

1878
        # set start/stop if they are not set if we are a table
1879
        if self.s.is_table:
1880
            if nrows is None:
1881
                nrows = 0
1882
            if start is None:
1883
                start = 0
1884
            if stop is None:
1885
                stop = nrows
1886
            stop = min(nrows, stop)
1887

1888
        self.nrows = nrows
1889
        self.start = start
1890
        self.stop = stop
1891

1892
        self.coordinates = None
1893
        if iterator or chunksize is not None:
1894
            if chunksize is None:
1895
                chunksize = 100000
1896
            self.chunksize = int(chunksize)
1897
        else:
1898
            self.chunksize = None
1899

1900
        self.auto_close = auto_close
1901

1902
    def __iter__(self):
1903
        # iterate
1904
        current = self.start
1905
        if self.coordinates is None:
1906
            raise ValueError("Cannot iterate until get_result is called.")
1907
        while current < self.stop:
1908
            stop = min(current + self.chunksize, self.stop)
1909
            value = self.func(None, None, self.coordinates[current:stop])
1910
            current = stop
1911
            if value is None or not len(value):
1912
                continue
1913

1914
            yield value
1915

1916
        self.close()
1917

1918
    def close(self):
1919
        if self.auto_close:
1920
            self.store.close()
1921

1922
    def get_result(self, coordinates: bool = False):
1923
        #  return the actual iterator
1924
        if self.chunksize is not None:
1925
            if not isinstance(self.s, Table):
1926
                raise TypeError("can only use an iterator or chunksize on a table")
1927

1928
            self.coordinates = self.s.read_coordinates(where=self.where)
1929

1930
            return self
1931

1932
        # if specified read via coordinates (necessary for multiple selections
1933
        if coordinates:
1934
            if not isinstance(self.s, Table):
1935
                raise TypeError("can only read_coordinates on a table")
1936
            where = self.s.read_coordinates(
1937
                where=self.where, start=self.start, stop=self.stop
1938
            )
1939
        else:
1940
            where = self.where
1941

1942
        # directly return the result
1943
        results = self.func(self.start, self.stop, where)
1944
        self.close()
1945
        return results
1946

1947

1948
class IndexCol:
1949
    """
1950
    an index column description class
1951

1952
    Parameters
1953
    ----------
1954
    axis   : axis which I reference
1955
    values : the ndarray like converted values
1956
    kind   : a string description of this type
1957
    typ    : the pytables type
1958
    pos    : the position in the pytables
1959

1960
    """
1961

1962
    is_an_indexable = True
1963
    is_data_indexable = True
1964
    _info_fields = ["freq", "tz", "index_name"]
1965

1966
    name: str
1967
    cname: str
1968

1969
    def __init__(
1970
        self,
1971
        name: str,
1972
        values=None,
1973
        kind=None,
1974
        typ=None,
1975
        cname: str | None = None,
1976
        axis=None,
1977
        pos=None,
1978
        freq=None,
1979
        tz=None,
1980
        index_name=None,
1981
        ordered=None,
1982
        table=None,
1983
        meta=None,
1984
        metadata=None,
1985
    ):
1986

1987
        if not isinstance(name, str):
1988
            raise ValueError("`name` must be a str.")
1989

1990
        self.values = values
1991
        self.kind = kind
1992
        self.typ = typ
1993
        self.name = name
1994
        self.cname = cname or name
1995
        self.axis = axis
1996
        self.pos = pos
1997
        self.freq = freq
1998
        self.tz = tz
1999
        self.index_name = index_name
2000
        self.ordered = ordered
2001
        self.table = table
2002
        self.meta = meta
2003
        self.metadata = metadata
2004

2005
        if pos is not None:
2006
            self.set_pos(pos)
2007

2008
        # These are ensured as long as the passed arguments match the
2009
        #  constructor annotations.
2010
        assert isinstance(self.name, str)
2011
        assert isinstance(self.cname, str)
2012

2013
    @property
2014
    def itemsize(self) -> int:
2015
        # Assumes self.typ has already been initialized
2016
        return self.typ.itemsize
2017

2018
    @property
2019
    def kind_attr(self) -> str:
2020
        return f"{self.name}_kind"
2021

2022
    def set_pos(self, pos: int):
2023
        """set the position of this column in the Table"""
2024
        self.pos = pos
2025
        if pos is not None and self.typ is not None:
2026
            self.typ._v_pos = pos
2027

2028
    def __repr__(self) -> str:
2029
        temp = tuple(
2030
            map(pprint_thing, (self.name, self.cname, self.axis, self.pos, self.kind))
2031
        )
2032
        return ",".join(
2033
            [
2034
                f"{key}->{value}"
2035
                for key, value in zip(["name", "cname", "axis", "pos", "kind"], temp)
2036
            ]
2037
        )
2038

2039
    def __eq__(self, other: Any) -> bool:
2040
        """compare 2 col items"""
2041
        return all(
2042
            getattr(self, a, None) == getattr(other, a, None)
2043
            for a in ["name", "cname", "axis", "pos"]
2044
        )
2045

2046
    def __ne__(self, other) -> bool:
2047
        return not self.__eq__(other)
2048

2049
    @property
2050
    def is_indexed(self) -> bool:
2051
        """return whether I am an indexed column"""
2052
        if not hasattr(self.table, "cols"):
2053
            # e.g. if infer hasn't been called yet, self.table will be None.
2054
            return False
2055
        return getattr(self.table.cols, self.cname).is_indexed
2056

2057
    def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
2058
        """
2059
        Convert the data from this selection to the appropriate pandas type.
2060
        """
2061
        assert isinstance(values, np.ndarray), type(values)
2062

2063
        # values is a recarray
2064
        if values.dtype.fields is not None:
2065
            values = values[self.cname]
2066

2067
        val_kind = _ensure_decoded(self.kind)
2068
        values = _maybe_convert(values, val_kind, encoding, errors)
2069

2070
        kwargs = {}
2071
        kwargs["name"] = _ensure_decoded(self.index_name)
2072

2073
        if self.freq is not None:
2074
            kwargs["freq"] = _ensure_decoded(self.freq)
2075

2076
        factory: type[Index] | type[DatetimeIndex] = Index
2077
        if is_datetime64_dtype(values.dtype) or is_datetime64tz_dtype(values.dtype):
2078
            factory = DatetimeIndex
2079
        elif values.dtype == "i8" and "freq" in kwargs:
2080
            # PeriodIndex data is stored as i8
2081
            # error: Incompatible types in assignment (expression has type
2082
            # "Callable[[Any, KwArg(Any)], PeriodIndex]", variable has type
2083
            # "Union[Type[Index], Type[DatetimeIndex]]")
2084
            factory = lambda x, **kwds: PeriodIndex(  # type: ignore[assignment]
2085
                ordinal=x, **kwds
2086
            )
2087

2088
        # making an Index instance could throw a number of different errors
2089
        try:
2090
            new_pd_index = factory(values, **kwargs)
2091
        except ValueError:
2092
            # if the output freq is different that what we recorded,
2093
            # it should be None (see also 'doc example part 2')
2094
            if "freq" in kwargs:
2095
                kwargs["freq"] = None
2096
            new_pd_index = factory(values, **kwargs)
2097
        final_pd_index = _set_tz(new_pd_index, self.tz)
2098
        return final_pd_index, final_pd_index
2099

2100
    def take_data(self):
2101
        """return the values"""
2102
        return self.values
2103

2104
    @property
2105
    def attrs(self):
2106
        return self.table._v_attrs
2107

2108
    @property
2109
    def description(self):
2110
        return self.table.description
2111

2112
    @property
2113
    def col(self):
2114
        """return my current col description"""
2115
        return getattr(self.description, self.cname, None)
2116

2117
    @property
2118
    def cvalues(self):
2119
        """return my cython values"""
2120
        return self.values
2121

2122
    def __iter__(self):
2123
        return iter(self.values)
2124

2125
    def maybe_set_size(self, min_itemsize=None):
2126
        """
2127
        maybe set a string col itemsize:
2128
            min_itemsize can be an integer or a dict with this columns name
2129
            with an integer size
2130
        """
2131
        if _ensure_decoded(self.kind) == "string":
2132
            if isinstance(min_itemsize, dict):
2133
                min_itemsize = min_itemsize.get(self.name)
2134

2135
            if min_itemsize is not None and self.typ.itemsize < min_itemsize:
2136
                self.typ = _tables().StringCol(itemsize=min_itemsize, pos=self.pos)
2137

2138
    def validate_names(self):
2139
        pass
2140

2141
    def validate_and_set(self, handler: AppendableTable, append: bool):
2142
        self.table = handler.table
2143
        self.validate_col()
2144
        self.validate_attr(append)
2145
        self.validate_metadata(handler)
2146
        self.write_metadata(handler)
2147
        self.set_attr()
2148

2149
    def validate_col(self, itemsize=None):
2150
        """validate this column: return the compared against itemsize"""
2151
        # validate this column for string truncation (or reset to the max size)
2152
        if _ensure_decoded(self.kind) == "string":
2153
            c = self.col
2154
            if c is not None:
2155
                if itemsize is None:
2156
                    itemsize = self.itemsize
2157
                if c.itemsize < itemsize:
2158
                    raise ValueError(
2159
                        f"Trying to store a string with len [{itemsize}] in "
2160
                        f"[{self.cname}] column but\nthis column has a limit of "
2161
                        f"[{c.itemsize}]!\nConsider using min_itemsize to "
2162
                        "preset the sizes on these columns"
2163
                    )
2164
                return c.itemsize
2165

2166
        return None
2167

2168
    def validate_attr(self, append: bool):
2169
        # check for backwards incompatibility
2170
        if append:
2171
            existing_kind = getattr(self.attrs, self.kind_attr, None)
2172
            if existing_kind is not None and existing_kind != self.kind:
2173
                raise TypeError(
2174
                    f"incompatible kind in col [{existing_kind} - {self.kind}]"
2175
                )
2176

2177
    def update_info(self, info):
2178
        """
2179
        set/update the info for this indexable with the key/value
2180
        if there is a conflict raise/warn as needed
2181
        """
2182
        for key in self._info_fields:
2183

2184
            value = getattr(self, key, None)
2185
            idx = info.setdefault(self.name, {})
2186

2187
            existing_value = idx.get(key)
2188
            if key in idx and value is not None and existing_value != value:
2189
                # frequency/name just warn
2190
                if key in ["freq", "index_name"]:
2191
                    ws = attribute_conflict_doc % (key, existing_value, value)
2192
                    warnings.warn(
2193
                        ws, AttributeConflictWarning, stacklevel=find_stack_level()
2194
                    )
2195

2196
                    # reset
2197
                    idx[key] = None
2198
                    setattr(self, key, None)
2199

2200
                else:
2201
                    raise ValueError(
2202
                        f"invalid info for [{self.name}] for [{key}], "
2203
                        f"existing_value [{existing_value}] conflicts with "
2204
                        f"new value [{value}]"
2205
                    )
2206
            else:
2207
                if value is not None or existing_value is not None:
2208
                    idx[key] = value
2209

2210
    def set_info(self, info):
2211
        """set my state from the passed info"""
2212
        idx = info.get(self.name)
2213
        if idx is not None:
2214
            self.__dict__.update(idx)
2215

2216
    def set_attr(self):
2217
        """set the kind for this column"""
2218
        setattr(self.attrs, self.kind_attr, self.kind)
2219

2220
    def validate_metadata(self, handler: AppendableTable):
2221
        """validate that kind=category does not change the categories"""
2222
        if self.meta == "category":
2223
            new_metadata = self.metadata
2224
            cur_metadata = handler.read_metadata(self.cname)
2225
            if (
2226
                new_metadata is not None
2227
                and cur_metadata is not None
2228
                and not array_equivalent(new_metadata, cur_metadata)
2229
            ):
2230
                raise ValueError(
2231
                    "cannot append a categorical with "
2232
                    "different categories to the existing"
2233
                )
2234

2235
    def write_metadata(self, handler: AppendableTable):
2236
        """set the meta data"""
2237
        if self.metadata is not None:
2238
            handler.write_metadata(self.cname, self.metadata)
2239

2240

2241
class GenericIndexCol(IndexCol):
2242
    """an index which is not represented in the data of the table"""
2243

2244
    @property
2245
    def is_indexed(self) -> bool:
2246
        return False
2247

2248
    def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
2249
        """
2250
        Convert the data from this selection to the appropriate pandas type.
2251

2252
        Parameters
2253
        ----------
2254
        values : np.ndarray
2255
        nan_rep : str
2256
        encoding : str
2257
        errors : str
2258
        """
2259
        assert isinstance(values, np.ndarray), type(values)
2260

2261
        # error: Incompatible types in assignment (expression has type
2262
        # "Int64Index", variable has type "ndarray")
2263
        values = Int64Index(np.arange(len(values)))  # type: ignore[assignment]
2264
        return values, values
2265

2266
    def set_attr(self):
2267
        pass
2268

2269

2270
class DataCol(IndexCol):
2271
    """
2272
    a data holding column, by definition this is not indexable
2273

2274
    Parameters
2275
    ----------
2276
    data   : the actual data
2277
    cname  : the column name in the table to hold the data (typically
2278
                values)
2279
    meta   : a string description of the metadata
2280
    metadata : the actual metadata
2281
    """
2282

2283
    is_an_indexable = False
2284
    is_data_indexable = False
2285
    _info_fields = ["tz", "ordered"]
2286

2287
    def __init__(
2288
        self,
2289
        name: str,
2290
        values=None,
2291
        kind=None,
2292
        typ=None,
2293
        cname=None,
2294
        pos=None,
2295
        tz=None,
2296
        ordered=None,
2297
        table=None,
2298
        meta=None,
2299
        metadata=None,
2300
        dtype: DtypeArg | None = None,
2301
        data=None,
2302
    ):
2303
        super().__init__(
2304
            name=name,
2305
            values=values,
2306
            kind=kind,
2307
            typ=typ,
2308
            pos=pos,
2309
            cname=cname,
2310
            tz=tz,
2311
            ordered=ordered,
2312
            table=table,
2313
            meta=meta,
2314
            metadata=metadata,
2315
        )
2316
        self.dtype = dtype
2317
        self.data = data
2318

2319
    @property
2320
    def dtype_attr(self) -> str:
2321
        return f"{self.name}_dtype"
2322

2323
    @property
2324
    def meta_attr(self) -> str:
2325
        return f"{self.name}_meta"
2326

2327
    def __repr__(self) -> str:
2328
        temp = tuple(
2329
            map(
2330
                pprint_thing, (self.name, self.cname, self.dtype, self.kind, self.shape)
2331
            )
2332
        )
2333
        return ",".join(
2334
            [
2335
                f"{key}->{value}"
2336
                for key, value in zip(["name", "cname", "dtype", "kind", "shape"], temp)
2337
            ]
2338
        )
2339

2340
    def __eq__(self, other: Any) -> bool:
2341
        """compare 2 col items"""
2342
        return all(
2343
            getattr(self, a, None) == getattr(other, a, None)
2344
            for a in ["name", "cname", "dtype", "pos"]
2345
        )
2346

2347
    def set_data(self, data: ArrayLike):
2348
        assert data is not None
2349
        assert self.dtype is None
2350

2351
        data, dtype_name = _get_data_and_dtype_name(data)
2352

2353
        self.data = data
2354
        self.dtype = dtype_name
2355
        self.kind = _dtype_to_kind(dtype_name)
2356

2357
    def take_data(self):
2358
        """return the data"""
2359
        return self.data
2360

2361
    @classmethod
2362
    def _get_atom(cls, values: ArrayLike) -> Col:
2363
        """
2364
        Get an appropriately typed and shaped pytables.Col object for values.
2365
        """
2366
        dtype = values.dtype
2367
        # error: Item "ExtensionDtype" of "Union[ExtensionDtype, dtype[Any]]" has no
2368
        # attribute "itemsize"
2369
        itemsize = dtype.itemsize  # type: ignore[union-attr]
2370

2371
        shape = values.shape
2372
        if values.ndim == 1:
2373
            # EA, use block shape pretending it is 2D
2374
            # TODO(EA2D): not necessary with 2D EAs
2375
            shape = (1, values.size)
2376

2377
        if isinstance(values, Categorical):
2378
            codes = values.codes
2379
            atom = cls.get_atom_data(shape, kind=codes.dtype.name)
2380
        elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
2381
            atom = cls.get_atom_datetime64(shape)
2382
        elif is_timedelta64_dtype(dtype):
2383
            atom = cls.get_atom_timedelta64(shape)
2384
        elif is_complex_dtype(dtype):
2385
            atom = _tables().ComplexCol(itemsize=itemsize, shape=shape[0])
2386
        elif is_string_dtype(dtype):
2387
            atom = cls.get_atom_string(shape, itemsize)
2388
        else:
2389
            atom = cls.get_atom_data(shape, kind=dtype.name)
2390

2391
        return atom
2392

2393
    @classmethod
2394
    def get_atom_string(cls, shape, itemsize):
2395
        return _tables().StringCol(itemsize=itemsize, shape=shape[0])
2396

2397
    @classmethod
2398
    def get_atom_coltype(cls, kind: str) -> type[Col]:
2399
        """return the PyTables column class for this column"""
2400
        if kind.startswith("uint"):
2401
            k4 = kind[4:]
2402
            col_name = f"UInt{k4}Col"
2403
        elif kind.startswith("period"):
2404
            # we store as integer
2405
            col_name = "Int64Col"
2406
        else:
2407
            kcap = kind.capitalize()
2408
            col_name = f"{kcap}Col"
2409

2410
        return getattr(_tables(), col_name)
2411

2412
    @classmethod
2413
    def get_atom_data(cls, shape, kind: str) -> Col:
2414
        return cls.get_atom_coltype(kind=kind)(shape=shape[0])
2415

2416
    @classmethod
2417
    def get_atom_datetime64(cls, shape):
2418
        return _tables().Int64Col(shape=shape[0])
2419

2420
    @classmethod
2421
    def get_atom_timedelta64(cls, shape):
2422
        return _tables().Int64Col(shape=shape[0])
2423

2424
    @property
2425
    def shape(self):
2426
        return getattr(self.data, "shape", None)
2427

2428
    @property
2429
    def cvalues(self):
2430
        """return my cython values"""
2431
        return self.data
2432

2433
    def validate_attr(self, append):
2434
        """validate that we have the same order as the existing & same dtype"""
2435
        if append:
2436
            existing_fields = getattr(self.attrs, self.kind_attr, None)
2437
            if existing_fields is not None and existing_fields != list(self.values):
2438
                raise ValueError("appended items do not match existing items in table!")
2439

2440
            existing_dtype = getattr(self.attrs, self.dtype_attr, None)
2441
            if existing_dtype is not None and existing_dtype != self.dtype:
2442
                raise ValueError(
2443
                    "appended items dtype do not match existing items dtype in table!"
2444
                )
2445

2446
    def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
2447
        """
2448
        Convert the data from this selection to the appropriate pandas type.
2449

2450
        Parameters
2451
        ----------
2452
        values : np.ndarray
2453
        nan_rep :
2454
        encoding : str
2455
        errors : str
2456

2457
        Returns
2458
        -------
2459
        index : listlike to become an Index
2460
        data : ndarraylike to become a column
2461
        """
2462
        assert isinstance(values, np.ndarray), type(values)
2463

2464
        # values is a recarray
2465
        if values.dtype.fields is not None:
2466
            values = values[self.cname]
2467

2468
        assert self.typ is not None
2469
        if self.dtype is None:
2470
            # Note: in tests we never have timedelta64 or datetime64,
2471
            #  so the _get_data_and_dtype_name may be unnecessary
2472
            converted, dtype_name = _get_data_and_dtype_name(values)
2473
            kind = _dtype_to_kind(dtype_name)
2474
        else:
2475
            converted = values
2476
            dtype_name = self.dtype
2477
            kind = self.kind
2478

2479
        assert isinstance(converted, np.ndarray)  # for mypy
2480

2481
        # use the meta if needed
2482
        meta = _ensure_decoded(self.meta)
2483
        metadata = self.metadata
2484
        ordered = self.ordered
2485
        tz = self.tz
2486

2487
        assert dtype_name is not None
2488
        # convert to the correct dtype
2489
        dtype = _ensure_decoded(dtype_name)
2490

2491
        # reverse converts
2492
        if dtype == "datetime64":
2493
            # recreate with tz if indicated
2494
            converted = _set_tz(converted, tz, coerce=True)
2495

2496
        elif dtype == "timedelta64":
2497
            converted = np.asarray(converted, dtype="m8[ns]")
2498
        elif dtype == "date":
2499
            try:
2500
                converted = np.asarray(
2501
                    [date.fromordinal(v) for v in converted], dtype=object
2502
                )
2503
            except ValueError:
2504
                converted = np.asarray(
2505
                    [date.fromtimestamp(v) for v in converted], dtype=object
2506
                )
2507

2508
        elif meta == "category":
2509
            # we have a categorical
2510
            categories = metadata
2511
            codes = converted.ravel()
2512

2513
            # if we have stored a NaN in the categories
2514
            # then strip it; in theory we could have BOTH
2515
            # -1s in the codes and nulls :<
2516
            if categories is None:
2517
                # Handle case of NaN-only categorical columns in which case
2518
                # the categories are an empty array; when this is stored,
2519
                # pytables cannot write a zero-len array, so on readback
2520
                # the categories would be None and `read_hdf()` would fail.
2521
                categories = Index([], dtype=np.float64)
2522
            else:
2523
                mask = isna(categories)
2524
                if mask.any():
2525
                    categories = categories[~mask]
2526
                    codes[codes != -1] -= mask.astype(int).cumsum()._values
2527

2528
            converted = Categorical.from_codes(
2529
                codes, categories=categories, ordered=ordered
2530
            )
2531

2532
        else:
2533

2534
            try:
2535
                converted = converted.astype(dtype, copy=False)
2536
            except TypeError:
2537
                converted = converted.astype("O", copy=False)
2538

2539
        # convert nans / decode
2540
        if _ensure_decoded(kind) == "string":
2541
            converted = _unconvert_string_array(
2542
                converted, nan_rep=nan_rep, encoding=encoding, errors=errors
2543
            )
2544

2545
        return self.values, converted
2546

2547
    def set_attr(self):
2548
        """set the data for this column"""
2549
        setattr(self.attrs, self.kind_attr, self.values)
2550
        setattr(self.attrs, self.meta_attr, self.meta)
2551
        assert self.dtype is not None
2552
        setattr(self.attrs, self.dtype_attr, self.dtype)
2553

2554

2555
class DataIndexableCol(DataCol):
2556
    """represent a data column that can be indexed"""
2557

2558
    is_data_indexable = True
2559

2560
    def validate_names(self):
2561
        if not Index(self.values).is_object():
2562
            # TODO: should the message here be more specifically non-str?
2563
            raise ValueError("cannot have non-object label DataIndexableCol")
2564

2565
    @classmethod
2566
    def get_atom_string(cls, shape, itemsize):
2567
        return _tables().StringCol(itemsize=itemsize)
2568

2569
    @classmethod
2570
    def get_atom_data(cls, shape, kind: str) -> Col:
2571
        return cls.get_atom_coltype(kind=kind)()
2572

2573
    @classmethod
2574
    def get_atom_datetime64(cls, shape):
2575
        return _tables().Int64Col()
2576

2577
    @classmethod
2578
    def get_atom_timedelta64(cls, shape):
2579
        return _tables().Int64Col()
2580

2581

2582
class GenericDataIndexableCol(DataIndexableCol):
2583
    """represent a generic pytables data column"""
2584

2585
    pass
2586

2587

2588
class Fixed:
2589
    """
2590
    represent an object in my store
2591
    facilitate read/write of various types of objects
2592
    this is an abstract base class
2593

2594
    Parameters
2595
    ----------
2596
    parent : HDFStore
2597
    group : Node
2598
        The group node where the table resides.
2599
    """
2600

2601
    pandas_kind: str
2602
    format_type: str = "fixed"  # GH#30962 needed by dask
2603
    obj_type: type[DataFrame | Series]
2604
    ndim: int
2605
    encoding: str
2606
    parent: HDFStore
2607
    group: Node
2608
    errors: str
2609
    is_table = False
2610

2611
    def __init__(
2612
        self,
2613
        parent: HDFStore,
2614
        group: Node,
2615
        encoding: str = "UTF-8",
2616
        errors: str = "strict",
2617
    ):
2618
        assert isinstance(parent, HDFStore), type(parent)
2619
        assert _table_mod is not None  # needed for mypy
2620
        assert isinstance(group, _table_mod.Node), type(group)
2621
        self.parent = parent
2622
        self.group = group
2623
        self.encoding = _ensure_encoding(encoding)
2624
        self.errors = errors
2625

2626
    @property
2627
    def is_old_version(self) -> bool:
2628
        return self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1
2629

2630
    @property
2631
    def version(self) -> tuple[int, int, int]:
2632
        """compute and set our version"""
2633
        version = _ensure_decoded(getattr(self.group._v_attrs, "pandas_version", None))
2634
        try:
2635
            version = tuple(int(x) for x in version.split("."))
2636
            if len(version) == 2:
2637
                version = version + (0,)
2638
        except AttributeError:
2639
            version = (0, 0, 0)
2640
        return version
2641

2642
    @property
2643
    def pandas_type(self):
2644
        return _ensure_decoded(getattr(self.group._v_attrs, "pandas_type", None))
2645

2646
    def __repr__(self) -> str:
2647
        """return a pretty representation of myself"""
2648
        self.infer_axes()
2649
        s = self.shape
2650
        if s is not None:
2651
            if isinstance(s, (list, tuple)):
2652
                jshape = ",".join([pprint_thing(x) for x in s])
2653
                s = f"[{jshape}]"
2654
            return f"{self.pandas_type:12.12} (shape->{s})"
2655
        return self.pandas_type
2656

2657
    def set_object_info(self):
2658
        """set my pandas type & version"""
2659
        self.attrs.pandas_type = str(self.pandas_kind)
2660
        self.attrs.pandas_version = str(_version)
2661

2662
    def copy(self):
2663
        new_self = copy.copy(self)
2664
        return new_self
2665

2666
    @property
2667
    def shape(self):
2668
        return self.nrows
2669

2670
    @property
2671
    def pathname(self):
2672
        return self.group._v_pathname
2673

2674
    @property
2675
    def _handle(self):
2676
        return self.parent._handle
2677

2678
    @property
2679
    def _filters(self):
2680
        return self.parent._filters
2681

2682
    @property
2683
    def _complevel(self) -> int:
2684
        return self.parent._complevel
2685

2686
    @property
2687
    def _fletcher32(self) -> bool:
2688
        return self.parent._fletcher32
2689

2690
    @property
2691
    def attrs(self):
2692
        return self.group._v_attrs
2693

2694
    def set_attrs(self):
2695
        """set our object attributes"""
2696
        pass
2697

2698
    def get_attrs(self):
2699
        """get our object attributes"""
2700
        pass
2701

2702
    @property
2703
    def storable(self):
2704
        """return my storable"""
2705
        return self.group
2706

2707
    @property
2708
    def is_exists(self) -> bool:
2709
        return False
2710

2711
    @property
2712
    def nrows(self):
2713
        return getattr(self.storable, "nrows", None)
2714

2715
    def validate(self, other):
2716
        """validate against an existing storable"""
2717
        if other is None:
2718
            return
2719
        return True
2720

2721
    def validate_version(self, where=None):
2722
        """are we trying to operate on an old version?"""
2723
        return True
2724

2725
    def infer_axes(self):
2726
        """
2727
        infer the axes of my storer
2728
        return a boolean indicating if we have a valid storer or not
2729
        """
2730
        s = self.storable
2731
        if s is None:
2732
            return False
2733
        self.get_attrs()
2734
        return True
2735

2736
    def read(
2737
        self,
2738
        where=None,
2739
        columns=None,
2740
        start: int | None = None,
2741
        stop: int | None = None,
2742
    ):
2743
        raise NotImplementedError(
2744
            "cannot read on an abstract storer: subclasses should implement"
2745
        )
2746

2747
    def write(self, **kwargs):
2748
        raise NotImplementedError(
2749
            "cannot write on an abstract storer: subclasses should implement"
2750
        )
2751

2752
    def delete(self, where=None, start: int | None = None, stop: int | None = None):
2753
        """
2754
        support fully deleting the node in its entirety (only) - where
2755
        specification must be None
2756
        """
2757
        if com.all_none(where, start, stop):
2758
            self._handle.remove_node(self.group, recursive=True)
2759
            return None
2760

2761
        raise TypeError("cannot delete on an abstract storer")
2762

2763

2764
class GenericFixed(Fixed):
2765
    """a generified fixed version"""
2766

2767
    _index_type_map = {DatetimeIndex: "datetime", PeriodIndex: "period"}
2768
    _reverse_index_map = {v: k for k, v in _index_type_map.items()}
2769
    attributes: list[str] = []
2770

2771
    # indexer helpers
2772
    def _class_to_alias(self, cls) -> str:
2773
        return self._index_type_map.get(cls, "")
2774

2775
    def _alias_to_class(self, alias):
2776
        if isinstance(alias, type):  # pragma: no cover
2777
            # compat: for a short period of time master stored types
2778
            return alias
2779
        return self._reverse_index_map.get(alias, Index)
2780

2781
    def _get_index_factory(self, attrs):
2782
        index_class = self._alias_to_class(
2783
            _ensure_decoded(getattr(attrs, "index_class", ""))
2784
        )
2785

2786
        factory: Callable
2787

2788
        if index_class == DatetimeIndex:
2789

2790
            def f(values, freq=None, tz=None):
2791
                # data are already in UTC, localize and convert if tz present
2792
                dta = DatetimeArray._simple_new(values.values, freq=freq)
2793
                result = DatetimeIndex._simple_new(dta, name=None)
2794
                if tz is not None:
2795
                    result = result.tz_localize("UTC").tz_convert(tz)
2796
                return result
2797

2798
            factory = f
2799
        elif index_class == PeriodIndex:
2800

2801
            def f(values, freq=None, tz=None):
2802
                parr = PeriodArray._simple_new(values, freq=freq)
2803
                return PeriodIndex._simple_new(parr, name=None)
2804

2805
            factory = f
2806
        else:
2807
            factory = index_class
2808

2809
        kwargs = {}
2810
        if "freq" in attrs:
2811
            kwargs["freq"] = attrs["freq"]
2812
            if index_class is Index:
2813
                # DTI/PI would be gotten by _alias_to_class
2814
                factory = TimedeltaIndex
2815

2816
        if "tz" in attrs:
2817
            if isinstance(attrs["tz"], bytes):
2818
                # created by python2
2819
                kwargs["tz"] = attrs["tz"].decode("utf-8")
2820
            else:
2821
                # created by python3
2822
                kwargs["tz"] = attrs["tz"]
2823
            assert index_class is DatetimeIndex  # just checking
2824

2825
        return factory, kwargs
2826

2827
    def validate_read(self, columns, where):
2828
        """
2829
        raise if any keywords are passed which are not-None
2830
        """
2831
        if columns is not None:
2832
            raise TypeError(
2833
                "cannot pass a column specification when reading "
2834
                "a Fixed format store. this store must be selected in its entirety"
2835
            )
2836
        if where is not None:
2837
            raise TypeError(
2838
                "cannot pass a where specification when reading "
2839
                "from a Fixed format store. this store must be selected in its entirety"
2840
            )
2841

2842
    @property
2843
    def is_exists(self) -> bool:
2844
        return True
2845

2846
    def set_attrs(self):
2847
        """set our object attributes"""
2848
        self.attrs.encoding = self.encoding
2849
        self.attrs.errors = self.errors
2850

2851
    def get_attrs(self):
2852
        """retrieve our attributes"""
2853
        self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None))
2854
        self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict"))
2855
        for n in self.attributes:
2856
            setattr(self, n, _ensure_decoded(getattr(self.attrs, n, None)))
2857

2858
    def write(self, obj, **kwargs):
2859
        self.set_attrs()
2860

2861
    def read_array(self, key: str, start: int | None = None, stop: int | None = None):
2862
        """read an array for the specified node (off of group"""
2863
        import tables
2864

2865
        node = getattr(self.group, key)
2866
        attrs = node._v_attrs
2867

2868
        transposed = getattr(attrs, "transposed", False)
2869

2870
        if isinstance(node, tables.VLArray):
2871
            ret = node[0][start:stop]
2872
        else:
2873
            dtype = _ensure_decoded(getattr(attrs, "value_type", None))
2874
            shape = getattr(attrs, "shape", None)
2875

2876
            if shape is not None:
2877
                # length 0 axis
2878
                ret = np.empty(shape, dtype=dtype)
2879
            else:
2880
                ret = node[start:stop]
2881

2882
            if dtype == "datetime64":
2883
                # reconstruct a timezone if indicated
2884
                tz = getattr(attrs, "tz", None)
2885
                ret = _set_tz(ret, tz, coerce=True)
2886

2887
            elif dtype == "timedelta64":
2888
                ret = np.asarray(ret, dtype="m8[ns]")
2889

2890
        if transposed:
2891
            return ret.T
2892
        else:
2893
            return ret
2894

2895
    def read_index(
2896
        self, key: str, start: int | None = None, stop: int | None = None
2897
    ) -> Index:
2898
        variety = _ensure_decoded(getattr(self.attrs, f"{key}_variety"))
2899

2900
        if variety == "multi":
2901
            return self.read_multi_index(key, start=start, stop=stop)
2902
        elif variety == "regular":
2903
            node = getattr(self.group, key)
2904
            index = self.read_index_node(node, start=start, stop=stop)
2905
            return index
2906
        else:  # pragma: no cover
2907
            raise TypeError(f"unrecognized index variety: {variety}")
2908

2909
    def write_index(self, key: str, index: Index):
2910
        if isinstance(index, MultiIndex):
2911
            setattr(self.attrs, f"{key}_variety", "multi")
2912
            self.write_multi_index(key, index)
2913
        else:
2914
            setattr(self.attrs, f"{key}_variety", "regular")
2915
            converted = _convert_index("index", index, self.encoding, self.errors)
2916

2917
            self.write_array(key, converted.values)
2918

2919
            node = getattr(self.group, key)
2920
            node._v_attrs.kind = converted.kind
2921
            node._v_attrs.name = index.name
2922

2923
            if isinstance(index, (DatetimeIndex, PeriodIndex)):
2924
                node._v_attrs.index_class = self._class_to_alias(type(index))
2925

2926
            if isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
2927
                node._v_attrs.freq = index.freq
2928

2929
            if isinstance(index, DatetimeIndex) and index.tz is not None:
2930
                node._v_attrs.tz = _get_tz(index.tz)
2931

2932
    def write_multi_index(self, key: str, index: MultiIndex):
2933
        setattr(self.attrs, f"{key}_nlevels", index.nlevels)
2934

2935
        for i, (lev, level_codes, name) in enumerate(
2936
            zip(index.levels, index.codes, index.names)
2937
        ):
2938
            # write the level
2939
            if is_extension_array_dtype(lev):
2940
                raise NotImplementedError(
2941
                    "Saving a MultiIndex with an extension dtype is not supported."
2942
                )
2943
            level_key = f"{key}_level{i}"
2944
            conv_level = _convert_index(level_key, lev, self.encoding, self.errors)
2945
            self.write_array(level_key, conv_level.values)
2946
            node = getattr(self.group, level_key)
2947
            node._v_attrs.kind = conv_level.kind
2948
            node._v_attrs.name = name
2949

2950
            # write the name
2951
            setattr(node._v_attrs, f"{key}_name{name}", name)
2952

2953
            # write the labels
2954
            label_key = f"{key}_label{i}"
2955
            self.write_array(label_key, level_codes)
2956

2957
    def read_multi_index(
2958
        self, key: str, start: int | None = None, stop: int | None = None
2959
    ) -> MultiIndex:
2960
        nlevels = getattr(self.attrs, f"{key}_nlevels")
2961

2962
        levels = []
2963
        codes = []
2964
        names: list[Hashable] = []
2965
        for i in range(nlevels):
2966
            level_key = f"{key}_level{i}"
2967
            node = getattr(self.group, level_key)
2968
            lev = self.read_index_node(node, start=start, stop=stop)
2969
            levels.append(lev)
2970
            names.append(lev.name)
2971

2972
            label_key = f"{key}_label{i}"
2973
            level_codes = self.read_array(label_key, start=start, stop=stop)
2974
            codes.append(level_codes)
2975

2976
        return MultiIndex(
2977
            levels=levels, codes=codes, names=names, verify_integrity=True
2978
        )
2979

2980
    def read_index_node(
2981
        self, node: Node, start: int | None = None, stop: int | None = None
2982
    ) -> Index:
2983
        data = node[start:stop]
2984
        # If the index was an empty array write_array_empty() will
2985
        # have written a sentinel. Here we replace it with the original.
2986
        if "shape" in node._v_attrs and np.prod(node._v_attrs.shape) == 0:
2987
            data = np.empty(node._v_attrs.shape, dtype=node._v_attrs.value_type)
2988
        kind = _ensure_decoded(node._v_attrs.kind)
2989
        name = None
2990

2991
        if "name" in node._v_attrs:
2992
            name = _ensure_str(node._v_attrs.name)
2993
            name = _ensure_decoded(name)
2994

2995
        attrs = node._v_attrs
2996
        factory, kwargs = self._get_index_factory(attrs)
2997

2998
        if kind == "date":
2999
            index = factory(
3000
                _unconvert_index(
3001
                    data, kind, encoding=self.encoding, errors=self.errors
3002
                ),
3003
                dtype=object,
3004
                **kwargs,
3005
            )
3006
        else:
3007
            index = factory(
3008
                _unconvert_index(
3009
                    data, kind, encoding=self.encoding, errors=self.errors
3010
                ),
3011
                **kwargs,
3012
            )
3013

3014
        index.name = name
3015

3016
        return index
3017

3018
    def write_array_empty(self, key: str, value: ArrayLike):
3019
        """write a 0-len array"""
3020
        # ugly hack for length 0 axes
3021
        arr = np.empty((1,) * value.ndim)
3022
        self._handle.create_array(self.group, key, arr)
3023
        node = getattr(self.group, key)
3024
        node._v_attrs.value_type = str(value.dtype)
3025
        node._v_attrs.shape = value.shape
3026

3027
    def write_array(
3028
        self, key: str, obj: DataFrame | Series, items: Index | None = None
3029
    ) -> None:
3030
        # TODO: we only have a few tests that get here, the only EA
3031
        #  that gets passed is DatetimeArray, and we never have
3032
        #  both self._filters and EA
3033

3034
        value = extract_array(obj, extract_numpy=True)
3035

3036
        if key in self.group:
3037
            self._handle.remove_node(self.group, key)
3038

3039
        # Transform needed to interface with pytables row/col notation
3040
        empty_array = value.size == 0
3041
        transposed = False
3042

3043
        if is_categorical_dtype(value.dtype):
3044
            raise NotImplementedError(
3045
                "Cannot store a category dtype in a HDF5 dataset that uses format="
3046
                '"fixed". Use format="table".'
3047
            )
3048
        if not empty_array:
3049
            if hasattr(value, "T"):
3050
                # ExtensionArrays (1d) may not have transpose.
3051
                value = value.T
3052
                transposed = True
3053

3054
        atom = None
3055
        if self._filters is not None:
3056
            with suppress(ValueError):
3057
                # get the atom for this datatype
3058
                atom = _tables().Atom.from_dtype(value.dtype)
3059

3060
        if atom is not None:
3061
            # We only get here if self._filters is non-None and
3062
            #  the Atom.from_dtype call succeeded
3063

3064
            # create an empty chunked array and fill it from value
3065
            if not empty_array:
3066
                ca = self._handle.create_carray(
3067
                    self.group, key, atom, value.shape, filters=self._filters
3068
                )
3069
                ca[:] = value
3070

3071
            else:
3072
                self.write_array_empty(key, value)
3073

3074
        elif value.dtype.type == np.object_:
3075
            # infer the type, warn if we have a non-string type here (for
3076
            # performance)
3077
            inferred_type = lib.infer_dtype(value, skipna=False)
3078
            if empty_array:
3079
                pass
3080
            elif inferred_type == "string":
3081
                pass
3082
            else:
3083
                ws = performance_doc % (inferred_type, key, items)
3084
                warnings.warn(ws, PerformanceWarning, stacklevel=find_stack_level())
3085

3086
            vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom())
3087
            vlarr.append(value)
3088

3089
        elif is_datetime64_dtype(value.dtype):
3090
            self._handle.create_array(self.group, key, value.view("i8"))
3091
            getattr(self.group, key)._v_attrs.value_type = "datetime64"
3092
        elif is_datetime64tz_dtype(value.dtype):
3093
            # store as UTC
3094
            # with a zone
3095

3096
            # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
3097
            # attribute "asi8"
3098
            self._handle.create_array(
3099
                self.group, key, value.asi8  # type: ignore[union-attr]
3100
            )
3101

3102
            node = getattr(self.group, key)
3103
            # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
3104
            # attribute "tz"
3105
            node._v_attrs.tz = _get_tz(value.tz)  # type: ignore[union-attr]
3106
            node._v_attrs.value_type = "datetime64"
3107
        elif is_timedelta64_dtype(value.dtype):
3108
            self._handle.create_array(self.group, key, value.view("i8"))
3109
            getattr(self.group, key)._v_attrs.value_type = "timedelta64"
3110
        elif empty_array:
3111
            self.write_array_empty(key, value)
3112
        else:
3113
            self._handle.create_array(self.group, key, value)
3114

3115
        getattr(self.group, key)._v_attrs.transposed = transposed
3116

3117

3118
class SeriesFixed(GenericFixed):
3119
    pandas_kind = "series"
3120
    attributes = ["name"]
3121

3122
    name: Hashable
3123

3124
    @property
3125
    def shape(self):
3126
        try:
3127
            return (len(self.group.values),)
3128
        except (TypeError, AttributeError):
3129
            return None
3130

3131
    def read(
3132
        self,
3133
        where=None,
3134
        columns=None,
3135
        start: int | None = None,
3136
        stop: int | None = None,
3137
    ):
3138
        self.validate_read(columns, where)
3139
        index = self.read_index("index", start=start, stop=stop)
3140
        values = self.read_array("values", start=start, stop=stop)
3141
        return Series(values, index=index, name=self.name)
3142

3143
    def write(self, obj, **kwargs):
3144
        super().write(obj, **kwargs)
3145
        self.write_index("index", obj.index)
3146
        self.write_array("values", obj)
3147
        self.attrs.name = obj.name
3148

3149

3150
class BlockManagerFixed(GenericFixed):
3151
    attributes = ["ndim", "nblocks"]
3152

3153
    nblocks: int
3154

3155
    @property
3156
    def shape(self) -> Shape | None:
3157
        try:
3158
            ndim = self.ndim
3159

3160
            # items
3161
            items = 0
3162
            for i in range(self.nblocks):
3163
                node = getattr(self.group, f"block{i}_items")
3164
                shape = getattr(node, "shape", None)
3165
                if shape is not None:
3166
                    items += shape[0]
3167

3168
            # data shape
3169
            node = self.group.block0_values
3170
            shape = getattr(node, "shape", None)
3171
            if shape is not None:
3172
                shape = list(shape[0 : (ndim - 1)])
3173
            else:
3174
                shape = []
3175

3176
            shape.append(items)
3177

3178
            return shape
3179
        except AttributeError:
3180
            return None
3181

3182
    def read(
3183
        self,
3184
        where=None,
3185
        columns=None,
3186
        start: int | None = None,
3187
        stop: int | None = None,
3188
    ):
3189
        # start, stop applied to rows, so 0th axis only
3190
        self.validate_read(columns, where)
3191
        select_axis = self.obj_type()._get_block_manager_axis(0)
3192

3193
        axes = []
3194
        for i in range(self.ndim):
3195

3196
            _start, _stop = (start, stop) if i == select_axis else (None, None)
3197
            ax = self.read_index(f"axis{i}", start=_start, stop=_stop)
3198
            axes.append(ax)
3199

3200
        items = axes[0]
3201
        dfs = []
3202

3203
        for i in range(self.nblocks):
3204

3205
            blk_items = self.read_index(f"block{i}_items")
3206
            values = self.read_array(f"block{i}_values", start=_start, stop=_stop)
3207

3208
            columns = items[items.get_indexer(blk_items)]
3209
            df = DataFrame(values.T, columns=columns, index=axes[1])
3210
            dfs.append(df)
3211

3212
        if len(dfs) > 0:
3213
            out = concat(dfs, axis=1)
3214
            out = out.reindex(columns=items, copy=False)
3215
            return out
3216

3217
        return DataFrame(columns=axes[0], index=axes[1])
3218

3219
    def write(self, obj, **kwargs):
3220
        super().write(obj, **kwargs)
3221

3222
        # TODO(ArrayManager) HDFStore relies on accessing the blocks
3223
        if isinstance(obj._mgr, ArrayManager):
3224
            obj = obj._as_manager("block")
3225

3226
        data = obj._mgr
3227
        if not data.is_consolidated():
3228
            data = data.consolidate()
3229

3230
        self.attrs.ndim = data.ndim
3231
        for i, ax in enumerate(data.axes):
3232
            if i == 0 and (not ax.is_unique):
3233
                raise ValueError("Columns index has to be unique for fixed format")
3234
            self.write_index(f"axis{i}", ax)
3235

3236
        # Supporting mixed-type DataFrame objects...nontrivial
3237
        self.attrs.nblocks = len(data.blocks)
3238
        for i, blk in enumerate(data.blocks):
3239
            # I have no idea why, but writing values before items fixed #2299
3240
            blk_items = data.items.take(blk.mgr_locs)
3241
            self.write_array(f"block{i}_values", blk.values, items=blk_items)
3242
            self.write_index(f"block{i}_items", blk_items)
3243

3244

3245
class FrameFixed(BlockManagerFixed):
3246
    pandas_kind = "frame"
3247
    obj_type = DataFrame
3248

3249

3250
class Table(Fixed):
3251
    """
3252
    represent a table:
3253
        facilitate read/write of various types of tables
3254

3255
    Attrs in Table Node
3256
    -------------------
3257
    These are attributes that are store in the main table node, they are
3258
    necessary to recreate these tables when read back in.
3259

3260
    index_axes    : a list of tuples of the (original indexing axis and
3261
        index column)
3262
    non_index_axes: a list of tuples of the (original index axis and
3263
        columns on a non-indexing axis)
3264
    values_axes   : a list of the columns which comprise the data of this
3265
        table
3266
    data_columns  : a list of the columns that we are allowing indexing
3267
        (these become single columns in values_axes)
3268
    nan_rep       : the string to use for nan representations for string
3269
        objects
3270
    levels        : the names of levels
3271
    metadata      : the names of the metadata columns
3272
    """
3273

3274
    pandas_kind = "wide_table"
3275
    format_type: str = "table"  # GH#30962 needed by dask
3276
    table_type: str
3277
    levels: int | list[Hashable] = 1
3278
    is_table = True
3279

3280
    index_axes: list[IndexCol]
3281
    non_index_axes: list[tuple[int, Any]]
3282
    values_axes: list[DataCol]
3283
    data_columns: list
3284
    metadata: list
3285
    info: dict
3286

3287
    def __init__(
3288
        self,
3289
        parent: HDFStore,
3290
        group: Node,
3291
        encoding=None,
3292
        errors: str = "strict",
3293
        index_axes=None,
3294
        non_index_axes=None,
3295
        values_axes=None,
3296
        data_columns=None,
3297
        info=None,
3298
        nan_rep=None,
3299
    ):
3300
        super().__init__(parent, group, encoding=encoding, errors=errors)
3301
        self.index_axes = index_axes or []
3302
        self.non_index_axes = non_index_axes or []
3303
        self.values_axes = values_axes or []
3304
        self.data_columns = data_columns or []
3305
        self.info = info or {}
3306
        self.nan_rep = nan_rep
3307

3308
    @property
3309
    def table_type_short(self) -> str:
3310
        return self.table_type.split("_")[0]
3311

3312
    def __repr__(self) -> str:
3313
        """return a pretty representation of myself"""
3314
        self.infer_axes()
3315
        jdc = ",".join(self.data_columns) if len(self.data_columns) else ""
3316
        dc = f",dc->[{jdc}]"
3317

3318
        ver = ""
3319
        if self.is_old_version:
3320
            jver = ".".join([str(x) for x in self.version])
3321
            ver = f"[{jver}]"
3322

3323
        jindex_axes = ",".join([a.name for a in self.index_axes])
3324
        return (
3325
            f"{self.pandas_type:12.12}{ver} "
3326
            f"(typ->{self.table_type_short},nrows->{self.nrows},"
3327
            f"ncols->{self.ncols},indexers->[{jindex_axes}]{dc})"
3328
        )
3329

3330
    def __getitem__(self, c: str):
3331
        """return the axis for c"""
3332
        for a in self.axes:
3333
            if c == a.name:
3334
                return a
3335
        return None
3336

3337
    def validate(self, other):
3338
        """validate against an existing table"""
3339
        if other is None:
3340
            return
3341

3342
        if other.table_type != self.table_type:
3343
            raise TypeError(
3344
                "incompatible table_type with existing "
3345
                f"[{other.table_type} - {self.table_type}]"
3346
            )
3347

3348
        for c in ["index_axes", "non_index_axes", "values_axes"]:
3349
            sv = getattr(self, c, None)
3350
            ov = getattr(other, c, None)
3351
            if sv != ov:
3352

3353
                # show the error for the specific axes
3354
                # Argument 1 to "enumerate" has incompatible type
3355
                # "Optional[Any]"; expected "Iterable[Any]"  [arg-type]
3356
                for i, sax in enumerate(sv):  # type: ignore[arg-type]
3357
                    # Value of type "Optional[Any]" is not indexable  [index]
3358
                    oax = ov[i]  # type: ignore[index]
3359
                    if sax != oax:
3360
                        raise ValueError(
3361
                            f"invalid combination of [{c}] on appending data "
3362
                            f"[{sax}] vs current table [{oax}]"
3363
                        )
3364

3365
                # should never get here
3366
                raise Exception(
3367
                    f"invalid combination of [{c}] on appending data [{sv}] vs "
3368
                    f"current table [{ov}]"
3369
                )
3370

3371
    @property
3372
    def is_multi_index(self) -> bool:
3373
        """the levels attribute is 1 or a list in the case of a multi-index"""
3374
        return isinstance(self.levels, list)
3375

3376
    def validate_multiindex(
3377
        self, obj: DataFrame | Series
3378
    ) -> tuple[DataFrame, list[Hashable]]:
3379
        """
3380
        validate that we can store the multi-index; reset and return the
3381
        new object
3382
        """
3383
        levels = com.fill_missing_names(obj.index.names)
3384
        try:
3385
            reset_obj = obj.reset_index()
3386
        except ValueError as err:
3387
            raise ValueError(
3388
                "duplicate names/columns in the multi-index when storing as a table"
3389
            ) from err
3390
        assert isinstance(reset_obj, DataFrame)  # for mypy
3391
        return reset_obj, levels
3392

3393
    @property
3394
    def nrows_expected(self) -> int:
3395
        """based on our axes, compute the expected nrows"""
3396
        return np.prod([i.cvalues.shape[0] for i in self.index_axes])
3397

3398
    @property
3399
    def is_exists(self) -> bool:
3400
        """has this table been created"""
3401
        return "table" in self.group
3402

3403
    @property
3404
    def storable(self):
3405
        return getattr(self.group, "table", None)
3406

3407
    @property
3408
    def table(self):
3409
        """return the table group (this is my storable)"""
3410
        return self.storable
3411

3412
    @property
3413
    def dtype(self):
3414
        return self.table.dtype
3415

3416
    @property
3417
    def description(self):
3418
        return self.table.description
3419

3420
    @property
3421
    def axes(self):
3422
        return itertools.chain(self.index_axes, self.values_axes)
3423

3424
    @property
3425
    def ncols(self) -> int:
3426
        """the number of total columns in the values axes"""
3427
        return sum(len(a.values) for a in self.values_axes)
3428

3429
    @property
3430
    def is_transposed(self) -> bool:
3431
        return False
3432

3433
    @property
3434
    def data_orientation(self):
3435
        """return a tuple of my permutated axes, non_indexable at the front"""
3436
        return tuple(
3437
            itertools.chain(
3438
                [int(a[0]) for a in self.non_index_axes],
3439
                [int(a.axis) for a in self.index_axes],
3440
            )
3441
        )
3442

3443
    def queryables(self) -> dict[str, Any]:
3444
        """return a dict of the kinds allowable columns for this object"""
3445
        # mypy doesn't recognize DataFrame._AXIS_NAMES, so we re-write it here
3446
        axis_names = {0: "index", 1: "columns"}
3447

3448
        # compute the values_axes queryables
3449
        d1 = [(a.cname, a) for a in self.index_axes]
3450
        d2 = [(axis_names[axis], None) for axis, values in self.non_index_axes]
3451
        d3 = [
3452
            (v.cname, v) for v in self.values_axes if v.name in set(self.data_columns)
3453
        ]
3454

3455
        # error: Unsupported operand types for + ("List[Tuple[str, IndexCol]]" and
3456
        # "List[Tuple[str, None]]")
3457
        return dict(d1 + d2 + d3)  # type: ignore[operator]
3458

3459
    def index_cols(self):
3460
        """return a list of my index cols"""
3461
        # Note: each `i.cname` below is assured to be a str.
3462
        return [(i.axis, i.cname) for i in self.index_axes]
3463

3464
    def values_cols(self) -> list[str]:
3465
        """return a list of my values cols"""
3466
        return [i.cname for i in self.values_axes]
3467

3468
    def _get_metadata_path(self, key: str) -> str:
3469
        """return the metadata pathname for this key"""
3470
        group = self.group._v_pathname
3471
        return f"{group}/meta/{key}/meta"
3472

3473
    def write_metadata(self, key: str, values: np.ndarray):
3474
        """
3475
        Write out a metadata array to the key as a fixed-format Series.
3476

3477
        Parameters
3478
        ----------
3479
        key : str
3480
        values : ndarray
3481
        """
3482
        self.parent.put(
3483
            self._get_metadata_path(key),
3484
            Series(values),
3485
            format="table",
3486
            encoding=self.encoding,
3487
            errors=self.errors,
3488
            nan_rep=self.nan_rep,
3489
        )
3490

3491
    def read_metadata(self, key: str):
3492
        """return the meta data array for this key"""
3493
        if getattr(getattr(self.group, "meta", None), key, None) is not None:
3494
            return self.parent.select(self._get_metadata_path(key))
3495
        return None
3496

3497
    def set_attrs(self):
3498
        """set our table type & indexables"""
3499
        self.attrs.table_type = str(self.table_type)
3500
        self.attrs.index_cols = self.index_cols()
3501
        self.attrs.values_cols = self.values_cols()
3502
        self.attrs.non_index_axes = self.non_index_axes
3503
        self.attrs.data_columns = self.data_columns
3504
        self.attrs.nan_rep = self.nan_rep
3505
        self.attrs.encoding = self.encoding
3506
        self.attrs.errors = self.errors
3507
        self.attrs.levels = self.levels
3508
        self.attrs.info = self.info
3509

3510
    def get_attrs(self):
3511
        """retrieve our attributes"""
3512
        self.non_index_axes = getattr(self.attrs, "non_index_axes", None) or []
3513
        self.data_columns = getattr(self.attrs, "data_columns", None) or []
3514
        self.info = getattr(self.attrs, "info", None) or {}
3515
        self.nan_rep = getattr(self.attrs, "nan_rep", None)
3516
        self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None))
3517
        self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict"))
3518
        self.levels: list[Hashable] = getattr(self.attrs, "levels", None) or []
3519
        self.index_axes = [a for a in self.indexables if a.is_an_indexable]
3520
        self.values_axes = [a for a in self.indexables if not a.is_an_indexable]
3521

3522
    def validate_version(self, where=None):
3523
        """are we trying to operate on an old version?"""
3524
        if where is not None:
3525
            if self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1:
3526
                ws = incompatibility_doc % ".".join([str(x) for x in self.version])
3527
                warnings.warn(ws, IncompatibilityWarning)
3528

3529
    def validate_min_itemsize(self, min_itemsize):
3530
        """
3531
        validate the min_itemsize doesn't contain items that are not in the
3532
        axes this needs data_columns to be defined
3533
        """
3534
        if min_itemsize is None:
3535
            return
3536
        if not isinstance(min_itemsize, dict):
3537
            return
3538

3539
        q = self.queryables()
3540
        for k in min_itemsize:
3541

3542
            # ok, apply generally
3543
            if k == "values":
3544
                continue
3545
            if k not in q:
3546
                raise ValueError(
3547
                    f"min_itemsize has the key [{k}] which is not an axis or "
3548
                    "data_column"
3549
                )
3550

3551
    @cache_readonly
3552
    def indexables(self):
3553
        """create/cache the indexables if they don't exist"""
3554
        _indexables = []
3555

3556
        desc = self.description
3557
        table_attrs = self.table.attrs
3558

3559
        # Note: each of the `name` kwargs below are str, ensured
3560
        #  by the definition in index_cols.
3561
        # index columns
3562
        for i, (axis, name) in enumerate(self.attrs.index_cols):
3563
            atom = getattr(desc, name)
3564
            md = self.read_metadata(name)
3565
            meta = "category" if md is not None else None
3566

3567
            kind_attr = f"{name}_kind"
3568
            kind = getattr(table_attrs, kind_attr, None)
3569

3570
            index_col = IndexCol(
3571
                name=name,
3572
                axis=axis,
3573
                pos=i,
3574
                kind=kind,
3575
                typ=atom,
3576
                table=self.table,
3577
                meta=meta,
3578
                metadata=md,
3579
            )
3580
            _indexables.append(index_col)
3581

3582
        # values columns
3583
        dc = set(self.data_columns)
3584
        base_pos = len(_indexables)
3585

3586
        def f(i, c):
3587
            assert isinstance(c, str)
3588
            klass = DataCol
3589
            if c in dc:
3590
                klass = DataIndexableCol
3591

3592
            atom = getattr(desc, c)
3593
            adj_name = _maybe_adjust_name(c, self.version)
3594

3595
            # TODO: why kind_attr here?
3596
            values = getattr(table_attrs, f"{adj_name}_kind", None)
3597
            dtype = getattr(table_attrs, f"{adj_name}_dtype", None)
3598
            # Argument 1 to "_dtype_to_kind" has incompatible type
3599
            # "Optional[Any]"; expected "str"  [arg-type]
3600
            kind = _dtype_to_kind(dtype)  # type: ignore[arg-type]
3601

3602
            md = self.read_metadata(c)
3603
            # TODO: figure out why these two versions of `meta` dont always match.
3604
            #  meta = "category" if md is not None else None
3605
            meta = getattr(table_attrs, f"{adj_name}_meta", None)
3606

3607
            obj = klass(
3608
                name=adj_name,
3609
                cname=c,
3610
                values=values,
3611
                kind=kind,
3612
                pos=base_pos + i,
3613
                typ=atom,
3614
                table=self.table,
3615
                meta=meta,
3616
                metadata=md,
3617
                dtype=dtype,
3618
            )
3619
            return obj
3620

3621
        # Note: the definition of `values_cols` ensures that each
3622
        #  `c` below is a str.
3623
        _indexables.extend([f(i, c) for i, c in enumerate(self.attrs.values_cols)])
3624

3625
        return _indexables
3626

3627
    def create_index(self, columns=None, optlevel=None, kind: str | None = None):
3628
        """
3629
        Create a pytables index on the specified columns.
3630

3631
        Parameters
3632
        ----------
3633
        columns : None, bool, or listlike[str]
3634
            Indicate which columns to create an index on.
3635

3636
            * False : Do not create any indexes.
3637
            * True : Create indexes on all columns.
3638
            * None : Create indexes on all columns.
3639
            * listlike : Create indexes on the given columns.
3640

3641
        optlevel : int or None, default None
3642
            Optimization level, if None, pytables defaults to 6.
3643
        kind : str or None, default None
3644
            Kind of index, if None, pytables defaults to "medium".
3645

3646
        Raises
3647
        ------
3648
        TypeError if trying to create an index on a complex-type column.
3649

3650
        Notes
3651
        -----
3652
        Cannot index Time64Col or ComplexCol.
3653
        Pytables must be >= 3.0.
3654
        """
3655
        if not self.infer_axes():
3656
            return
3657
        if columns is False:
3658
            return
3659

3660
        # index all indexables and data_columns
3661
        if columns is None or columns is True:
3662
            columns = [a.cname for a in self.axes if a.is_data_indexable]
3663
        if not isinstance(columns, (tuple, list)):
3664
            columns = [columns]
3665

3666
        kw = {}
3667
        if optlevel is not None:
3668
            kw["optlevel"] = optlevel
3669
        if kind is not None:
3670
            kw["kind"] = kind
3671

3672
        table = self.table
3673
        for c in columns:
3674
            v = getattr(table.cols, c, None)
3675
            if v is not None:
3676
                # remove the index if the kind/optlevel have changed
3677
                if v.is_indexed:
3678
                    index = v.index
3679
                    cur_optlevel = index.optlevel
3680
                    cur_kind = index.kind
3681

3682
                    if kind is not None and cur_kind != kind:
3683
                        v.remove_index()
3684
                    else:
3685
                        kw["kind"] = cur_kind
3686

3687
                    if optlevel is not None and cur_optlevel != optlevel:
3688
                        v.remove_index()
3689
                    else:
3690
                        kw["optlevel"] = cur_optlevel
3691

3692
                # create the index
3693
                if not v.is_indexed:
3694
                    if v.type.startswith("complex"):
3695
                        raise TypeError(
3696
                            "Columns containing complex values can be stored but "
3697
                            "cannot be indexed when using table format. Either use "
3698
                            "fixed format, set index=False, or do not include "
3699
                            "the columns containing complex values to "
3700
                            "data_columns when initializing the table."
3701
                        )
3702
                    v.create_index(**kw)
3703
            elif c in self.non_index_axes[0][1]:
3704
                # GH 28156
3705
                raise AttributeError(
3706
                    f"column {c} is not a data_column.\n"
3707
                    f"In order to read column {c} you must reload the dataframe \n"
3708
                    f"into HDFStore and include {c} with the data_columns argument."
3709
                )
3710

3711
    def _read_axes(
3712
        self, where, start: int | None = None, stop: int | None = None
3713
    ) -> list[tuple[ArrayLike, ArrayLike]]:
3714
        """
3715
        Create the axes sniffed from the table.
3716

3717
        Parameters
3718
        ----------
3719
        where : ???
3720
        start : int or None, default None
3721
        stop : int or None, default None
3722

3723
        Returns
3724
        -------
3725
        List[Tuple[index_values, column_values]]
3726
        """
3727
        # create the selection
3728
        selection = Selection(self, where=where, start=start, stop=stop)
3729
        values = selection.select()
3730

3731
        results = []
3732
        # convert the data
3733
        for a in self.axes:
3734
            a.set_info(self.info)
3735
            res = a.convert(
3736
                values,
3737
                nan_rep=self.nan_rep,
3738
                encoding=self.encoding,
3739
                errors=self.errors,
3740
            )
3741
            results.append(res)
3742

3743
        return results
3744

3745
    @classmethod
3746
    def get_object(cls, obj, transposed: bool):
3747
        """return the data for this obj"""
3748
        return obj
3749

3750
    def validate_data_columns(self, data_columns, min_itemsize, non_index_axes):
3751
        """
3752
        take the input data_columns and min_itemize and create a data
3753
        columns spec
3754
        """
3755
        if not len(non_index_axes):
3756
            return []
3757

3758
        axis, axis_labels = non_index_axes[0]
3759
        info = self.info.get(axis, {})
3760
        if info.get("type") == "MultiIndex" and data_columns:
3761
            raise ValueError(
3762
                f"cannot use a multi-index on axis [{axis}] with "
3763
                f"data_columns {data_columns}"
3764
            )
3765

3766
        # evaluate the passed data_columns, True == use all columns
3767
        # take only valid axis labels
3768
        if data_columns is True:
3769
            data_columns = list(axis_labels)
3770
        elif data_columns is None:
3771
            data_columns = []
3772

3773
        # if min_itemsize is a dict, add the keys (exclude 'values')
3774
        if isinstance(min_itemsize, dict):
3775
            existing_data_columns = set(data_columns)
3776
            data_columns = list(data_columns)  # ensure we do not modify
3777
            data_columns.extend(
3778
                [
3779
                    k
3780
                    for k in min_itemsize.keys()
3781
                    if k != "values" and k not in existing_data_columns
3782
                ]
3783
            )
3784

3785
        # return valid columns in the order of our axis
3786
        return [c for c in data_columns if c in axis_labels]
3787

3788
    def _create_axes(
3789
        self,
3790
        axes,
3791
        obj: DataFrame,
3792
        validate: bool = True,
3793
        nan_rep=None,
3794
        data_columns=None,
3795
        min_itemsize=None,
3796
    ):
3797
        """
3798
        Create and return the axes.
3799

3800
        Parameters
3801
        ----------
3802
        axes: list or None
3803
            The names or numbers of the axes to create.
3804
        obj : DataFrame
3805
            The object to create axes on.
3806
        validate: bool, default True
3807
            Whether to validate the obj against an existing object already written.
3808
        nan_rep :
3809
            A value to use for string column nan_rep.
3810
        data_columns : List[str], True, or None, default None
3811
            Specify the columns that we want to create to allow indexing on.
3812

3813
            * True : Use all available columns.
3814
            * None : Use no columns.
3815
            * List[str] : Use the specified columns.
3816

3817
        min_itemsize: Dict[str, int] or None, default None
3818
            The min itemsize for a column in bytes.
3819
        """
3820
        if not isinstance(obj, DataFrame):
3821
            group = self.group._v_name
3822
            raise TypeError(
3823
                f"cannot properly create the storer for: [group->{group},"
3824
                f"value->{type(obj)}]"
3825
            )
3826

3827
        # set the default axes if needed
3828
        if axes is None:
3829
            axes = [0]
3830

3831
        # map axes to numbers
3832
        axes = [obj._get_axis_number(a) for a in axes]
3833

3834
        # do we have an existing table (if so, use its axes & data_columns)
3835
        if self.infer_axes():
3836
            table_exists = True
3837
            axes = [a.axis for a in self.index_axes]
3838
            data_columns = list(self.data_columns)
3839
            nan_rep = self.nan_rep
3840
            # TODO: do we always have validate=True here?
3841
        else:
3842
            table_exists = False
3843

3844
        new_info = self.info
3845

3846
        assert self.ndim == 2  # with next check, we must have len(axes) == 1
3847
        # currently support on ndim-1 axes
3848
        if len(axes) != self.ndim - 1:
3849
            raise ValueError(
3850
                "currently only support ndim-1 indexers in an AppendableTable"
3851
            )
3852

3853
        # create according to the new data
3854
        new_non_index_axes: list = []
3855

3856
        # nan_representation
3857
        if nan_rep is None:
3858
            nan_rep = "nan"
3859

3860
        # We construct the non-index-axis first, since that alters new_info
3861
        idx = [x for x in [0, 1] if x not in axes][0]
3862

3863
        a = obj.axes[idx]
3864
        # we might be able to change the axes on the appending data if necessary
3865
        append_axis = list(a)
3866
        if table_exists:
3867
            indexer = len(new_non_index_axes)  # i.e. 0
3868
            exist_axis = self.non_index_axes[indexer][1]
3869
            if not array_equivalent(np.array(append_axis), np.array(exist_axis)):
3870

3871
                # ahah! -> reindex
3872
                if array_equivalent(
3873
                    np.array(sorted(append_axis)), np.array(sorted(exist_axis))
3874
                ):
3875
                    append_axis = exist_axis
3876

3877
        # the non_index_axes info
3878
        info = new_info.setdefault(idx, {})
3879
        info["names"] = list(a.names)
3880
        info["type"] = type(a).__name__
3881

3882
        new_non_index_axes.append((idx, append_axis))
3883

3884
        # Now we can construct our new index axis
3885
        idx = axes[0]
3886
        a = obj.axes[idx]
3887
        axis_name = obj._get_axis_name(idx)
3888
        new_index = _convert_index(axis_name, a, self.encoding, self.errors)
3889
        new_index.axis = idx
3890

3891
        # Because we are always 2D, there is only one new_index, so
3892
        #  we know it will have pos=0
3893
        new_index.set_pos(0)
3894
        new_index.update_info(new_info)
3895
        new_index.maybe_set_size(min_itemsize)  # check for column conflicts
3896

3897
        new_index_axes = [new_index]
3898
        j = len(new_index_axes)  # i.e. 1
3899
        assert j == 1
3900

3901
        # reindex by our non_index_axes & compute data_columns
3902
        assert len(new_non_index_axes) == 1
3903
        for a in new_non_index_axes:
3904
            obj = _reindex_axis(obj, a[0], a[1])
3905

3906
        transposed = new_index.axis == 1
3907

3908
        # figure out data_columns and get out blocks
3909
        data_columns = self.validate_data_columns(
3910
            data_columns, min_itemsize, new_non_index_axes
3911
        )
3912

3913
        frame = self.get_object(obj, transposed)._consolidate()
3914

3915
        blocks, blk_items = self._get_blocks_and_items(
3916
            frame, table_exists, new_non_index_axes, self.values_axes, data_columns
3917
        )
3918

3919
        # add my values
3920
        vaxes = []
3921
        for i, (blk, b_items) in enumerate(zip(blocks, blk_items)):
3922

3923
            # shape of the data column are the indexable axes
3924
            klass = DataCol
3925
            name = None
3926

3927
            # we have a data_column
3928
            if data_columns and len(b_items) == 1 and b_items[0] in data_columns:
3929
                klass = DataIndexableCol
3930
                name = b_items[0]
3931
                if not (name is None or isinstance(name, str)):
3932
                    # TODO: should the message here be more specifically non-str?
3933
                    raise ValueError("cannot have non-object label DataIndexableCol")
3934

3935
            # make sure that we match up the existing columns
3936
            # if we have an existing table
3937
            existing_col: DataCol | None
3938

3939
            if table_exists and validate:
3940
                try:
3941
                    existing_col = self.values_axes[i]
3942
                except (IndexError, KeyError) as err:
3943
                    raise ValueError(
3944
                        f"Incompatible appended table [{blocks}]"
3945
                        f"with existing table [{self.values_axes}]"
3946
                    ) from err
3947
            else:
3948
                existing_col = None
3949

3950
            new_name = name or f"values_block_{i}"
3951
            data_converted = _maybe_convert_for_string_atom(
3952
                new_name,
3953
                blk.values,
3954
                existing_col=existing_col,
3955
                min_itemsize=min_itemsize,
3956
                nan_rep=nan_rep,
3957
                encoding=self.encoding,
3958
                errors=self.errors,
3959
                columns=b_items,
3960
            )
3961
            adj_name = _maybe_adjust_name(new_name, self.version)
3962

3963
            typ = klass._get_atom(data_converted)
3964
            kind = _dtype_to_kind(data_converted.dtype.name)
3965
            tz = None
3966
            if getattr(data_converted, "tz", None) is not None:
3967
                tz = _get_tz(data_converted.tz)
3968

3969
            meta = metadata = ordered = None
3970
            if is_categorical_dtype(data_converted.dtype):
3971
                ordered = data_converted.ordered
3972
                meta = "category"
3973
                metadata = np.array(data_converted.categories, copy=False).ravel()
3974

3975
            data, dtype_name = _get_data_and_dtype_name(data_converted)
3976

3977
            col = klass(
3978
                name=adj_name,
3979
                cname=new_name,
3980
                values=list(b_items),
3981
                typ=typ,
3982
                pos=j,
3983
                kind=kind,
3984
                tz=tz,
3985
                ordered=ordered,
3986
                meta=meta,
3987
                metadata=metadata,
3988
                dtype=dtype_name,
3989
                data=data,
3990
            )
3991
            col.update_info(new_info)
3992

3993
            vaxes.append(col)
3994

3995
            j += 1
3996

3997
        dcs = [col.name for col in vaxes if col.is_data_indexable]
3998

3999
        new_table = type(self)(
4000
            parent=self.parent,
4001
            group=self.group,
4002
            encoding=self.encoding,
4003
            errors=self.errors,
4004
            index_axes=new_index_axes,
4005
            non_index_axes=new_non_index_axes,
4006
            values_axes=vaxes,
4007
            data_columns=dcs,
4008
            info=new_info,
4009
            nan_rep=nan_rep,
4010
        )
4011
        if hasattr(self, "levels"):
4012
            # TODO: get this into constructor, only for appropriate subclass
4013
            new_table.levels = self.levels
4014

4015
        new_table.validate_min_itemsize(min_itemsize)
4016

4017
        if validate and table_exists:
4018
            new_table.validate(self)
4019

4020
        return new_table
4021

4022
    @staticmethod
4023
    def _get_blocks_and_items(
4024
        frame: DataFrame,
4025
        table_exists: bool,
4026
        new_non_index_axes,
4027
        values_axes,
4028
        data_columns,
4029
    ):
4030
        # Helper to clarify non-state-altering parts of _create_axes
4031

4032
        # TODO(ArrayManager) HDFStore relies on accessing the blocks
4033
        if isinstance(frame._mgr, ArrayManager):
4034
            frame = frame._as_manager("block")
4035

4036
        def get_blk_items(mgr):
4037
            return [mgr.items.take(blk.mgr_locs) for blk in mgr.blocks]
4038

4039
        mgr = frame._mgr
4040
        mgr = cast(BlockManager, mgr)
4041
        blocks: list[Block] = list(mgr.blocks)
4042
        blk_items: list[Index] = get_blk_items(mgr)
4043

4044
        if len(data_columns):
4045
            axis, axis_labels = new_non_index_axes[0]
4046
            new_labels = Index(axis_labels).difference(Index(data_columns))
4047
            mgr = frame.reindex(new_labels, axis=axis)._mgr
4048

4049
            # error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" has no
4050
            # attribute "blocks"
4051
            blocks = list(mgr.blocks)  # type: ignore[union-attr]
4052
            blk_items = get_blk_items(mgr)
4053
            for c in data_columns:
4054
                mgr = frame.reindex([c], axis=axis)._mgr
4055
                # error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" has
4056
                # no attribute "blocks"
4057
                blocks.extend(mgr.blocks)  # type: ignore[union-attr]
4058
                blk_items.extend(get_blk_items(mgr))
4059

4060
        # reorder the blocks in the same order as the existing table if we can
4061
        if table_exists:
4062
            by_items = {
4063
                tuple(b_items.tolist()): (b, b_items)
4064
                for b, b_items in zip(blocks, blk_items)
4065
            }
4066
            new_blocks: list[Block] = []
4067
            new_blk_items = []
4068
            for ea in values_axes:
4069
                items = tuple(ea.values)
4070
                try:
4071
                    b, b_items = by_items.pop(items)
4072
                    new_blocks.append(b)
4073
                    new_blk_items.append(b_items)
4074
                except (IndexError, KeyError) as err:
4075
                    jitems = ",".join([pprint_thing(item) for item in items])
4076
                    raise ValueError(
4077
                        f"cannot match existing table structure for [{jitems}] "
4078
                        "on appending data"
4079
                    ) from err
4080
            blocks = new_blocks
4081
            blk_items = new_blk_items
4082

4083
        return blocks, blk_items
4084

4085
    def process_axes(self, obj, selection: Selection, columns=None):
4086
        """process axes filters"""
4087
        # make a copy to avoid side effects
4088
        if columns is not None:
4089
            columns = list(columns)
4090

4091
        # make sure to include levels if we have them
4092
        if columns is not None and self.is_multi_index:
4093
            assert isinstance(self.levels, list)  # assured by is_multi_index
4094
            for n in self.levels:
4095
                if n not in columns:
4096
                    columns.insert(0, n)
4097

4098
        # reorder by any non_index_axes & limit to the select columns
4099
        for axis, labels in self.non_index_axes:
4100
            obj = _reindex_axis(obj, axis, labels, columns)
4101

4102
        # apply the selection filters (but keep in the same order)
4103
        if selection.filter is not None:
4104
            for field, op, filt in selection.filter.format():
4105

4106
                def process_filter(field, filt):
4107

4108
                    for axis_name in obj._AXIS_ORDERS:
4109
                        axis_number = obj._get_axis_number(axis_name)
4110
                        axis_values = obj._get_axis(axis_name)
4111
                        assert axis_number is not None
4112

4113
                        # see if the field is the name of an axis
4114
                        if field == axis_name:
4115

4116
                            # if we have a multi-index, then need to include
4117
                            # the levels
4118
                            if self.is_multi_index:
4119
                                filt = filt.union(Index(self.levels))
4120

4121
                            takers = op(axis_values, filt)
4122
                            return obj.loc(axis=axis_number)[takers]
4123

4124
                        # this might be the name of a file IN an axis
4125
                        elif field in axis_values:
4126

4127
                            # we need to filter on this dimension
4128
                            values = ensure_index(getattr(obj, field).values)
4129
                            filt = ensure_index(filt)
4130

4131
                            # hack until we support reversed dim flags
4132
                            if isinstance(obj, DataFrame):
4133
                                axis_number = 1 - axis_number
4134
                            takers = op(values, filt)
4135
                            return obj.loc(axis=axis_number)[takers]
4136

4137
                    raise ValueError(f"cannot find the field [{field}] for filtering!")
4138

4139
                obj = process_filter(field, filt)
4140

4141
        return obj
4142

4143
    def create_description(
4144
        self,
4145
        complib,
4146
        complevel: int | None,
4147
        fletcher32: bool,
4148
        expectedrows: int | None,
4149
    ) -> dict[str, Any]:
4150
        """create the description of the table from the axes & values"""
4151
        # provided expected rows if its passed
4152
        if expectedrows is None:
4153
            expectedrows = max(self.nrows_expected, 10000)
4154

4155
        d = {"name": "table", "expectedrows": expectedrows}
4156

4157
        # description from the axes & values
4158
        d["description"] = {a.cname: a.typ for a in self.axes}
4159

4160
        if complib:
4161
            if complevel is None:
4162
                complevel = self._complevel or 9
4163
            filters = _tables().Filters(
4164
                complevel=complevel,
4165
                complib=complib,
4166
                fletcher32=fletcher32 or self._fletcher32,
4167
            )
4168
            d["filters"] = filters
4169
        elif self._filters is not None:
4170
            d["filters"] = self._filters
4171

4172
        return d
4173

4174
    def read_coordinates(
4175
        self, where=None, start: int | None = None, stop: int | None = None
4176
    ):
4177
        """
4178
        select coordinates (row numbers) from a table; return the
4179
        coordinates object
4180
        """
4181
        # validate the version
4182
        self.validate_version(where)
4183

4184
        # infer the data kind
4185
        if not self.infer_axes():
4186
            return False
4187

4188
        # create the selection
4189
        selection = Selection(self, where=where, start=start, stop=stop)
4190
        coords = selection.select_coords()
4191
        if selection.filter is not None:
4192
            for field, op, filt in selection.filter.format():
4193
                data = self.read_column(
4194
                    field, start=coords.min(), stop=coords.max() + 1
4195
                )
4196
                coords = coords[op(data.iloc[coords - coords.min()], filt).values]
4197

4198
        return Index(coords)
4199

4200
    def read_column(
4201
        self,
4202
        column: str,
4203
        where=None,
4204
        start: int | None = None,
4205
        stop: int | None = None,
4206
    ):
4207
        """
4208
        return a single column from the table, generally only indexables
4209
        are interesting
4210
        """
4211
        # validate the version
4212
        self.validate_version()
4213

4214
        # infer the data kind
4215
        if not self.infer_axes():
4216
            return False
4217

4218
        if where is not None:
4219
            raise TypeError("read_column does not currently accept a where clause")
4220

4221
        # find the axes
4222
        for a in self.axes:
4223
            if column == a.name:
4224
                if not a.is_data_indexable:
4225
                    raise ValueError(
4226
                        f"column [{column}] can not be extracted individually; "
4227
                        "it is not data indexable"
4228
                    )
4229

4230
                # column must be an indexable or a data column
4231
                c = getattr(self.table.cols, column)
4232
                a.set_info(self.info)
4233
                col_values = a.convert(
4234
                    c[start:stop],
4235
                    nan_rep=self.nan_rep,
4236
                    encoding=self.encoding,
4237
                    errors=self.errors,
4238
                )
4239
                return Series(_set_tz(col_values[1], a.tz), name=column)
4240

4241
        raise KeyError(f"column [{column}] not found in the table")
4242

4243

4244
class WORMTable(Table):
4245
    """
4246
    a write-once read-many table: this format DOES NOT ALLOW appending to a
4247
    table. writing is a one-time operation the data are stored in a format
4248
    that allows for searching the data on disk
4249
    """
4250

4251
    table_type = "worm"
4252

4253
    def read(
4254
        self,
4255
        where=None,
4256
        columns=None,
4257
        start: int | None = None,
4258
        stop: int | None = None,
4259
    ):
4260
        """
4261
        read the indices and the indexing array, calculate offset rows and return
4262
        """
4263
        raise NotImplementedError("WORMTable needs to implement read")
4264

4265
    def write(self, **kwargs):
4266
        """
4267
        write in a format that we can search later on (but cannot append
4268
        to): write out the indices and the values using _write_array
4269
        (e.g. a CArray) create an indexing table so that we can search
4270
        """
4271
        raise NotImplementedError("WORMTable needs to implement write")
4272

4273

4274
class AppendableTable(Table):
4275
    """support the new appendable table formats"""
4276

4277
    table_type = "appendable"
4278

4279
    def write(
4280
        self,
4281
        obj,
4282
        axes=None,
4283
        append=False,
4284
        complib=None,
4285
        complevel=None,
4286
        fletcher32=None,
4287
        min_itemsize=None,
4288
        chunksize=None,
4289
        expectedrows=None,
4290
        dropna=False,
4291
        nan_rep=None,
4292
        data_columns=None,
4293
        track_times=True,
4294
    ):
4295
        if not append and self.is_exists:
4296
            self._handle.remove_node(self.group, "table")
4297

4298
        # create the axes
4299
        table = self._create_axes(
4300
            axes=axes,
4301
            obj=obj,
4302
            validate=append,
4303
            min_itemsize=min_itemsize,
4304
            nan_rep=nan_rep,
4305
            data_columns=data_columns,
4306
        )
4307

4308
        for a in table.axes:
4309
            a.validate_names()
4310

4311
        if not table.is_exists:
4312

4313
            # create the table
4314
            options = table.create_description(
4315
                complib=complib,
4316
                complevel=complevel,
4317
                fletcher32=fletcher32,
4318
                expectedrows=expectedrows,
4319
            )
4320

4321
            # set the table attributes
4322
            table.set_attrs()
4323

4324
            options["track_times"] = track_times
4325

4326
            # create the table
4327
            table._handle.create_table(table.group, **options)
4328

4329
        # update my info
4330
        table.attrs.info = table.info
4331

4332
        # validate the axes and set the kinds
4333
        for a in table.axes:
4334
            a.validate_and_set(table, append)
4335

4336
        # add the rows
4337
        table.write_data(chunksize, dropna=dropna)
4338

4339
    def write_data(self, chunksize: int | None, dropna: bool = False):
4340
        """
4341
        we form the data into a 2-d including indexes,values,mask write chunk-by-chunk
4342
        """
4343
        names = self.dtype.names
4344
        nrows = self.nrows_expected
4345

4346
        # if dropna==True, then drop ALL nan rows
4347
        masks = []
4348
        if dropna:
4349
            for a in self.values_axes:
4350
                # figure the mask: only do if we can successfully process this
4351
                # column, otherwise ignore the mask
4352
                mask = isna(a.data).all(axis=0)
4353
                if isinstance(mask, np.ndarray):
4354
                    masks.append(mask.astype("u1", copy=False))
4355

4356
        # consolidate masks
4357
        if len(masks):
4358
            mask = masks[0]
4359
            for m in masks[1:]:
4360
                mask = mask & m
4361
            mask = mask.ravel()
4362
        else:
4363
            mask = None
4364

4365
        # broadcast the indexes if needed
4366
        indexes = [a.cvalues for a in self.index_axes]
4367
        nindexes = len(indexes)
4368
        assert nindexes == 1, nindexes  # ensures we dont need to broadcast
4369

4370
        # transpose the values so first dimension is last
4371
        # reshape the values if needed
4372
        values = [a.take_data() for a in self.values_axes]
4373
        values = [v.transpose(np.roll(np.arange(v.ndim), v.ndim - 1)) for v in values]
4374
        bvalues = []
4375
        for i, v in enumerate(values):
4376
            new_shape = (nrows,) + self.dtype[names[nindexes + i]].shape
4377
            bvalues.append(values[i].reshape(new_shape))
4378

4379
        # write the chunks
4380
        if chunksize is None:
4381
            chunksize = 100000
4382

4383
        rows = np.empty(min(chunksize, nrows), dtype=self.dtype)
4384
        chunks = nrows // chunksize + 1
4385
        for i in range(chunks):
4386
            start_i = i * chunksize
4387
            end_i = min((i + 1) * chunksize, nrows)
4388
            if start_i >= end_i:
4389
                break
4390

4391
            self.write_data_chunk(
4392
                rows,
4393
                indexes=[a[start_i:end_i] for a in indexes],
4394
                mask=mask[start_i:end_i] if mask is not None else None,
4395
                values=[v[start_i:end_i] for v in bvalues],
4396
            )
4397

4398
    def write_data_chunk(
4399
        self,
4400
        rows: np.ndarray,
4401
        indexes: list[np.ndarray],
4402
        mask: np.ndarray | None,
4403
        values: list[np.ndarray],
4404
    ):
4405
        """
4406
        Parameters
4407
        ----------
4408
        rows : an empty memory space where we are putting the chunk
4409
        indexes : an array of the indexes
4410
        mask : an array of the masks
4411
        values : an array of the values
4412
        """
4413
        # 0 len
4414
        for v in values:
4415
            if not np.prod(v.shape):
4416
                return
4417

4418
        nrows = indexes[0].shape[0]
4419
        if nrows != len(rows):
4420
            rows = np.empty(nrows, dtype=self.dtype)
4421
        names = self.dtype.names
4422
        nindexes = len(indexes)
4423

4424
        # indexes
4425
        for i, idx in enumerate(indexes):
4426
            rows[names[i]] = idx
4427

4428
        # values
4429
        for i, v in enumerate(values):
4430
            rows[names[i + nindexes]] = v
4431

4432
        # mask
4433
        if mask is not None:
4434
            m = ~mask.ravel().astype(bool, copy=False)
4435
            if not m.all():
4436
                rows = rows[m]
4437

4438
        if len(rows):
4439
            self.table.append(rows)
4440
            self.table.flush()
4441

4442
    def delete(self, where=None, start: int | None = None, stop: int | None = None):
4443

4444
        # delete all rows (and return the nrows)
4445
        if where is None or not len(where):
4446
            if start is None and stop is None:
4447
                nrows = self.nrows
4448
                self._handle.remove_node(self.group, recursive=True)
4449
            else:
4450
                # pytables<3.0 would remove a single row with stop=None
4451
                if stop is None:
4452
                    stop = self.nrows
4453
                nrows = self.table.remove_rows(start=start, stop=stop)
4454
                self.table.flush()
4455
            return nrows
4456

4457
        # infer the data kind
4458
        if not self.infer_axes():
4459
            return None
4460

4461
        # create the selection
4462
        table = self.table
4463
        selection = Selection(self, where, start=start, stop=stop)
4464
        values = selection.select_coords()
4465

4466
        # delete the rows in reverse order
4467
        sorted_series = Series(values).sort_values()
4468
        ln = len(sorted_series)
4469

4470
        if ln:
4471

4472
            # construct groups of consecutive rows
4473
            diff = sorted_series.diff()
4474
            groups = list(diff[diff > 1].index)
4475

4476
            # 1 group
4477
            if not len(groups):
4478
                groups = [0]
4479

4480
            # final element
4481
            if groups[-1] != ln:
4482
                groups.append(ln)
4483

4484
            # initial element
4485
            if groups[0] != 0:
4486
                groups.insert(0, 0)
4487

4488
            # we must remove in reverse order!
4489
            pg = groups.pop()
4490
            for g in reversed(groups):
4491
                rows = sorted_series.take(range(g, pg))
4492
                table.remove_rows(
4493
                    start=rows[rows.index[0]], stop=rows[rows.index[-1]] + 1
4494
                )
4495
                pg = g
4496

4497
            self.table.flush()
4498

4499
        # return the number of rows removed
4500
        return ln
4501

4502

4503
class AppendableFrameTable(AppendableTable):
4504
    """support the new appendable table formats"""
4505

4506
    pandas_kind = "frame_table"
4507
    table_type = "appendable_frame"
4508
    ndim = 2
4509
    obj_type: type[DataFrame | Series] = DataFrame
4510

4511
    @property
4512
    def is_transposed(self) -> bool:
4513
        return self.index_axes[0].axis == 1
4514

4515
    @classmethod
4516
    def get_object(cls, obj, transposed: bool):
4517
        """these are written transposed"""
4518
        if transposed:
4519
            obj = obj.T
4520
        return obj
4521

4522
    def read(
4523
        self,
4524
        where=None,
4525
        columns=None,
4526
        start: int | None = None,
4527
        stop: int | None = None,
4528
    ):
4529

4530
        # validate the version
4531
        self.validate_version(where)
4532

4533
        # infer the data kind
4534
        if not self.infer_axes():
4535
            return None
4536

4537
        result = self._read_axes(where=where, start=start, stop=stop)
4538

4539
        info = (
4540
            self.info.get(self.non_index_axes[0][0], {})
4541
            if len(self.non_index_axes)
4542
            else {}
4543
        )
4544

4545
        inds = [i for i, ax in enumerate(self.axes) if ax is self.index_axes[0]]
4546
        assert len(inds) == 1
4547
        ind = inds[0]
4548

4549
        index = result[ind][0]
4550

4551
        frames = []
4552
        for i, a in enumerate(self.axes):
4553
            if a not in self.values_axes:
4554
                continue
4555
            index_vals, cvalues = result[i]
4556

4557
            # we could have a multi-index constructor here
4558
            # ensure_index doesn't recognized our list-of-tuples here
4559
            if info.get("type") != "MultiIndex":
4560
                cols = Index(index_vals)
4561
            else:
4562
                cols = MultiIndex.from_tuples(index_vals)
4563

4564
            names = info.get("names")
4565
            if names is not None:
4566
                cols.set_names(names, inplace=True)
4567

4568
            if self.is_transposed:
4569
                values = cvalues
4570
                index_ = cols
4571
                cols_ = Index(index, name=getattr(index, "name", None))
4572
            else:
4573
                values = cvalues.T
4574
                index_ = Index(index, name=getattr(index, "name", None))
4575
                cols_ = cols
4576

4577
            # if we have a DataIndexableCol, its shape will only be 1 dim
4578
            if values.ndim == 1 and isinstance(values, np.ndarray):
4579
                values = values.reshape((1, values.shape[0]))
4580

4581
            if isinstance(values, np.ndarray):
4582
                df = DataFrame(values.T, columns=cols_, index=index_)
4583
            elif isinstance(values, Index):
4584
                df = DataFrame(values, columns=cols_, index=index_)
4585
            else:
4586
                # Categorical
4587
                df = DataFrame._from_arrays([values], columns=cols_, index=index_)
4588
            assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)
4589
            frames.append(df)
4590

4591
        if len(frames) == 1:
4592
            df = frames[0]
4593
        else:
4594
            df = concat(frames, axis=1)
4595

4596
        selection = Selection(self, where=where, start=start, stop=stop)
4597
        # apply the selection filters & axis orderings
4598
        df = self.process_axes(df, selection=selection, columns=columns)
4599

4600
        return df
4601

4602

4603
class AppendableSeriesTable(AppendableFrameTable):
4604
    """support the new appendable table formats"""
4605

4606
    pandas_kind = "series_table"
4607
    table_type = "appendable_series"
4608
    ndim = 2
4609
    obj_type = Series
4610

4611
    @property
4612
    def is_transposed(self) -> bool:
4613
        return False
4614

4615
    @classmethod
4616
    def get_object(cls, obj, transposed: bool):
4617
        return obj
4618

4619
    def write(self, obj, data_columns=None, **kwargs):
4620
        """we are going to write this as a frame table"""
4621
        if not isinstance(obj, DataFrame):
4622
            name = obj.name or "values"
4623
            obj = obj.to_frame(name)
4624
        return super().write(obj=obj, data_columns=obj.columns.tolist(), **kwargs)
4625

4626
    def read(
4627
        self,
4628
        where=None,
4629
        columns=None,
4630
        start: int | None = None,
4631
        stop: int | None = None,
4632
    ) -> Series:
4633

4634
        is_multi_index = self.is_multi_index
4635
        if columns is not None and is_multi_index:
4636
            assert isinstance(self.levels, list)  # needed for mypy
4637
            for n in self.levels:
4638
                if n not in columns:
4639
                    columns.insert(0, n)
4640
        s = super().read(where=where, columns=columns, start=start, stop=stop)
4641
        if is_multi_index:
4642
            s.set_index(self.levels, inplace=True)
4643

4644
        s = s.iloc[:, 0]
4645

4646
        # remove the default name
4647
        if s.name == "values":
4648
            s.name = None
4649
        return s
4650

4651

4652
class AppendableMultiSeriesTable(AppendableSeriesTable):
4653
    """support the new appendable table formats"""
4654

4655
    pandas_kind = "series_table"
4656
    table_type = "appendable_multiseries"
4657

4658
    def write(self, obj, **kwargs):
4659
        """we are going to write this as a frame table"""
4660
        name = obj.name or "values"
4661
        newobj, self.levels = self.validate_multiindex(obj)
4662
        assert isinstance(self.levels, list)  # for mypy
4663
        cols = list(self.levels)
4664
        cols.append(name)
4665
        newobj.columns = Index(cols)
4666
        return super().write(obj=newobj, **kwargs)
4667

4668

4669
class GenericTable(AppendableFrameTable):
4670
    """a table that read/writes the generic pytables table format"""
4671

4672
    pandas_kind = "frame_table"
4673
    table_type = "generic_table"
4674
    ndim = 2
4675
    obj_type = DataFrame
4676
    levels: list[Hashable]
4677

4678
    @property
4679
    def pandas_type(self) -> str:
4680
        return self.pandas_kind
4681

4682
    @property
4683
    def storable(self):
4684
        return getattr(self.group, "table", None) or self.group
4685

4686
    def get_attrs(self):
4687
        """retrieve our attributes"""
4688
        self.non_index_axes = []
4689
        self.nan_rep = None
4690
        self.levels = []
4691

4692
        self.index_axes = [a for a in self.indexables if a.is_an_indexable]
4693
        self.values_axes = [a for a in self.indexables if not a.is_an_indexable]
4694
        self.data_columns = [a.name for a in self.values_axes]
4695

4696
    @cache_readonly
4697
    def indexables(self):
4698
        """create the indexables from the table description"""
4699
        d = self.description
4700

4701
        # TODO: can we get a typ for this?  AFAICT it is the only place
4702
        #  where we aren't passing one
4703
        # the index columns is just a simple index
4704
        md = self.read_metadata("index")
4705
        meta = "category" if md is not None else None
4706
        index_col = GenericIndexCol(
4707
            name="index", axis=0, table=self.table, meta=meta, metadata=md
4708
        )
4709

4710
        _indexables: list[GenericIndexCol | GenericDataIndexableCol] = [index_col]
4711

4712
        for i, n in enumerate(d._v_names):
4713
            assert isinstance(n, str)
4714

4715
            atom = getattr(d, n)
4716
            md = self.read_metadata(n)
4717
            meta = "category" if md is not None else None
4718
            dc = GenericDataIndexableCol(
4719
                name=n,
4720
                pos=i,
4721
                values=[n],
4722
                typ=atom,
4723
                table=self.table,
4724
                meta=meta,
4725
                metadata=md,
4726
            )
4727
            _indexables.append(dc)
4728

4729
        return _indexables
4730

4731
    def write(self, **kwargs):
4732
        raise NotImplementedError("cannot write on an generic table")
4733

4734

4735
class AppendableMultiFrameTable(AppendableFrameTable):
4736
    """a frame with a multi-index"""
4737

4738
    table_type = "appendable_multiframe"
4739
    obj_type = DataFrame
4740
    ndim = 2
4741
    _re_levels = re.compile(r"^level_\d+$")
4742

4743
    @property
4744
    def table_type_short(self) -> str:
4745
        return "appendable_multi"
4746

4747
    def write(self, obj, data_columns=None, **kwargs):
4748
        if data_columns is None:
4749
            data_columns = []
4750
        elif data_columns is True:
4751
            data_columns = obj.columns.tolist()
4752
        obj, self.levels = self.validate_multiindex(obj)
4753
        assert isinstance(self.levels, list)  # for mypy
4754
        for n in self.levels:
4755
            if n not in data_columns:
4756
                data_columns.insert(0, n)
4757
        return super().write(obj=obj, data_columns=data_columns, **kwargs)
4758

4759
    def read(
4760
        self,
4761
        where=None,
4762
        columns=None,
4763
        start: int | None = None,
4764
        stop: int | None = None,
4765
    ):
4766

4767
        df = super().read(where=where, columns=columns, start=start, stop=stop)
4768
        df = df.set_index(self.levels)
4769

4770
        # remove names for 'level_%d'
4771
        df.index = df.index.set_names(
4772
            [None if self._re_levels.search(name) else name for name in df.index.names]
4773
        )
4774

4775
        return df
4776

4777

4778
def _reindex_axis(obj: DataFrame, axis: int, labels: Index, other=None) -> DataFrame:
4779
    ax = obj._get_axis(axis)
4780
    labels = ensure_index(labels)
4781

4782
    # try not to reindex even if other is provided
4783
    # if it equals our current index
4784
    if other is not None:
4785
        other = ensure_index(other)
4786
    if (other is None or labels.equals(other)) and labels.equals(ax):
4787
        return obj
4788

4789
    labels = ensure_index(labels.unique())
4790
    if other is not None:
4791
        labels = ensure_index(other.unique()).intersection(labels, sort=False)
4792
    if not labels.equals(ax):
4793
        slicer: list[slice | Index] = [slice(None, None)] * obj.ndim
4794
        slicer[axis] = labels
4795
        obj = obj.loc[tuple(slicer)]
4796
    return obj
4797

4798

4799
# tz to/from coercion
4800

4801

4802
def _get_tz(tz: tzinfo) -> str | tzinfo:
4803
    """for a tz-aware type, return an encoded zone"""
4804
    zone = timezones.get_timezone(tz)
4805
    return zone
4806

4807

4808
def _set_tz(
4809
    values: np.ndarray | Index,
4810
    tz: str | tzinfo | None,
4811
    coerce: bool = False,
4812
) -> np.ndarray | DatetimeIndex:
4813
    """
4814
    coerce the values to a DatetimeIndex if tz is set
4815
    preserve the input shape if possible
4816

4817
    Parameters
4818
    ----------
4819
    values : ndarray or Index
4820
    tz : str or tzinfo
4821
    coerce : if we do not have a passed timezone, coerce to M8[ns] ndarray
4822
    """
4823
    if isinstance(values, DatetimeIndex):
4824
        # If values is tzaware, the tz gets dropped in the values.ravel()
4825
        #  call below (which returns an ndarray).  So we are only non-lossy
4826
        #  if `tz` matches `values.tz`.
4827
        assert values.tz is None or values.tz == tz
4828

4829
    if tz is not None:
4830
        if isinstance(values, DatetimeIndex):
4831
            name = values.name
4832
            values = values.asi8
4833
        else:
4834
            name = None
4835
            values = values.ravel()
4836

4837
        tz = _ensure_decoded(tz)
4838
        values = DatetimeIndex(values, name=name)
4839
        values = values.tz_localize("UTC").tz_convert(tz)
4840
    elif coerce:
4841
        values = np.asarray(values, dtype="M8[ns]")
4842

4843
    # error: Incompatible return value type (got "Union[ndarray, Index]",
4844
    # expected "Union[ndarray, DatetimeIndex]")
4845
    return values  # type: ignore[return-value]
4846

4847

4848
def _convert_index(name: str, index: Index, encoding: str, errors: str) -> IndexCol:
4849
    assert isinstance(name, str)
4850

4851
    index_name = index.name
4852
    # error: Argument 1 to "_get_data_and_dtype_name" has incompatible type "Index";
4853
    # expected "Union[ExtensionArray, ndarray]"
4854
    converted, dtype_name = _get_data_and_dtype_name(index)  # type: ignore[arg-type]
4855
    kind = _dtype_to_kind(dtype_name)
4856
    atom = DataIndexableCol._get_atom(converted)
4857

4858
    if isinstance(index, Int64Index) or needs_i8_conversion(index.dtype):
4859
        # Includes Int64Index, RangeIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex,
4860
        #  in which case "kind" is "integer", "integer", "datetime64",
4861
        #  "timedelta64", and "integer", respectively.
4862
        return IndexCol(
4863
            name,
4864
            values=converted,
4865
            kind=kind,
4866
            typ=atom,
4867
            freq=getattr(index, "freq", None),
4868
            tz=getattr(index, "tz", None),
4869
            index_name=index_name,
4870
        )
4871

4872
    if isinstance(index, MultiIndex):
4873
        raise TypeError("MultiIndex not supported here!")
4874

4875
    inferred_type = lib.infer_dtype(index, skipna=False)
4876
    # we won't get inferred_type of "datetime64" or "timedelta64" as these
4877
    #  would go through the DatetimeIndex/TimedeltaIndex paths above
4878

4879
    values = np.asarray(index)
4880

4881
    if inferred_type == "date":
4882
        converted = np.asarray([v.toordinal() for v in values], dtype=np.int32)
4883
        return IndexCol(
4884
            name, converted, "date", _tables().Time32Col(), index_name=index_name
4885
        )
4886
    elif inferred_type == "string":
4887

4888
        converted = _convert_string_array(values, encoding, errors)
4889
        itemsize = converted.dtype.itemsize
4890
        return IndexCol(
4891
            name,
4892
            converted,
4893
            "string",
4894
            _tables().StringCol(itemsize),
4895
            index_name=index_name,
4896
        )
4897

4898
    elif inferred_type in ["integer", "floating"]:
4899
        return IndexCol(
4900
            name, values=converted, kind=kind, typ=atom, index_name=index_name
4901
        )
4902
    else:
4903
        assert isinstance(converted, np.ndarray) and converted.dtype == object
4904
        assert kind == "object", kind
4905
        atom = _tables().ObjectAtom()
4906
        return IndexCol(name, converted, kind, atom, index_name=index_name)
4907

4908

4909
def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray | Index:
4910
    index: Index | np.ndarray
4911

4912
    if kind == "datetime64":
4913
        index = DatetimeIndex(data)
4914
    elif kind == "timedelta64":
4915
        index = TimedeltaIndex(data)
4916
    elif kind == "date":
4917
        try:
4918
            index = np.asarray([date.fromordinal(v) for v in data], dtype=object)
4919
        except (ValueError):
4920
            index = np.asarray([date.fromtimestamp(v) for v in data], dtype=object)
4921
    elif kind in ("integer", "float"):
4922
        index = np.asarray(data)
4923
    elif kind in ("string"):
4924
        index = _unconvert_string_array(
4925
            data, nan_rep=None, encoding=encoding, errors=errors
4926
        )
4927
    elif kind == "object":
4928
        index = np.asarray(data[0])
4929
    else:  # pragma: no cover
4930
        raise ValueError(f"unrecognized index type {kind}")
4931
    return index
4932

4933

4934
def _maybe_convert_for_string_atom(
4935
    name: str,
4936
    bvalues: ArrayLike,
4937
    existing_col,
4938
    min_itemsize,
4939
    nan_rep,
4940
    encoding,
4941
    errors,
4942
    columns: list[str],
4943
):
4944

4945
    if bvalues.dtype != object:
4946
        return bvalues
4947

4948
    bvalues = cast(np.ndarray, bvalues)
4949

4950
    dtype_name = bvalues.dtype.name
4951
    inferred_type = lib.infer_dtype(bvalues, skipna=False)
4952

4953
    if inferred_type == "date":
4954
        raise TypeError("[date] is not implemented as a table column")
4955
    elif inferred_type == "datetime":
4956
        # after GH#8260
4957
        # this only would be hit for a multi-timezone dtype which is an error
4958
        raise TypeError(
4959
            "too many timezones in this block, create separate data columns"
4960
        )
4961

4962
    elif not (inferred_type == "string" or dtype_name == "object"):
4963
        return bvalues
4964

4965
    mask = isna(bvalues)
4966
    data = bvalues.copy()
4967
    data[mask] = nan_rep
4968

4969
    # see if we have a valid string type
4970
    inferred_type = lib.infer_dtype(data, skipna=False)
4971
    if inferred_type != "string":
4972

4973
        # we cannot serialize this data, so report an exception on a column
4974
        # by column basis
4975

4976
        # expected behaviour:
4977
        # search block for a non-string object column by column
4978
        for i in range(data.shape[0]):
4979
            col = data[i]
4980
            inferred_type = lib.infer_dtype(col, skipna=False)
4981
            if inferred_type != "string":
4982
                error_column_label = columns[i] if len(columns) > i else f"No.{i}"
4983
                raise TypeError(
4984
                    f"Cannot serialize the column [{error_column_label}]\n"
4985
                    f"because its data contents are not [string] but "
4986
                    f"[{inferred_type}] object dtype"
4987
                )
4988

4989
    # itemsize is the maximum length of a string (along any dimension)
4990

4991
    data_converted = _convert_string_array(data, encoding, errors).reshape(data.shape)
4992
    itemsize = data_converted.itemsize
4993

4994
    # specified min_itemsize?
4995
    if isinstance(min_itemsize, dict):
4996
        min_itemsize = int(min_itemsize.get(name) or min_itemsize.get("values") or 0)
4997
    itemsize = max(min_itemsize or 0, itemsize)
4998

4999
    # check for column in the values conflicts
5000
    if existing_col is not None:
5001
        eci = existing_col.validate_col(itemsize)
5002
        if eci is not None and eci > itemsize:
5003
            itemsize = eci
5004

5005
    data_converted = data_converted.astype(f"|S{itemsize}", copy=False)
5006
    return data_converted
5007

5008

5009
def _convert_string_array(data: np.ndarray, encoding: str, errors: str) -> np.ndarray:
5010
    """
5011
    Take a string-like that is object dtype and coerce to a fixed size string type.
5012

5013
    Parameters
5014
    ----------
5015
    data : np.ndarray[object]
5016
    encoding : str
5017
    errors : str
5018
        Handler for encoding errors.
5019

5020
    Returns
5021
    -------
5022
    np.ndarray[fixed-length-string]
5023
    """
5024
    # encode if needed
5025
    if len(data):
5026
        data = (
5027
            Series(data.ravel())
5028
            .str.encode(encoding, errors)
5029
            ._values.reshape(data.shape)
5030
        )
5031

5032
    # create the sized dtype
5033
    ensured = ensure_object(data.ravel())
5034
    itemsize = max(1, libwriters.max_len_string_array(ensured))
5035

5036
    data = np.asarray(data, dtype=f"S{itemsize}")
5037
    return data
5038

5039

5040
def _unconvert_string_array(
5041
    data: np.ndarray, nan_rep, encoding: str, errors: str
5042
) -> np.ndarray:
5043
    """
5044
    Inverse of _convert_string_array.
5045

5046
    Parameters
5047
    ----------
5048
    data : np.ndarray[fixed-length-string]
5049
    nan_rep : the storage repr of NaN
5050
    encoding : str
5051
    errors : str
5052
        Handler for encoding errors.
5053

5054
    Returns
5055
    -------
5056
    np.ndarray[object]
5057
        Decoded data.
5058
    """
5059
    shape = data.shape
5060
    data = np.asarray(data.ravel(), dtype=object)
5061

5062
    if len(data):
5063

5064
        itemsize = libwriters.max_len_string_array(ensure_object(data))
5065
        dtype = f"U{itemsize}"
5066

5067
        if isinstance(data[0], bytes):
5068
            data = Series(data).str.decode(encoding, errors=errors)._values
5069
        else:
5070
            data = data.astype(dtype, copy=False).astype(object, copy=False)
5071

5072
    if nan_rep is None:
5073
        nan_rep = "nan"
5074

5075
    libwriters.string_array_replace_from_nan_rep(data, nan_rep)
5076
    return data.reshape(shape)
5077

5078

5079
def _maybe_convert(values: np.ndarray, val_kind: str, encoding: str, errors: str):
5080
    assert isinstance(val_kind, str), type(val_kind)
5081
    if _need_convert(val_kind):
5082
        conv = _get_converter(val_kind, encoding, errors)
5083
        values = conv(values)
5084
    return values
5085

5086

5087
def _get_converter(kind: str, encoding: str, errors: str):
5088
    if kind == "datetime64":
5089
        return lambda x: np.asarray(x, dtype="M8[ns]")
5090
    elif kind == "string":
5091
        return lambda x: _unconvert_string_array(
5092
            x, nan_rep=None, encoding=encoding, errors=errors
5093
        )
5094
    else:  # pragma: no cover
5095
        raise ValueError(f"invalid kind {kind}")
5096

5097

5098
def _need_convert(kind: str) -> bool:
5099
    if kind in ("datetime64", "string"):
5100
        return True
5101
    return False
5102

5103

5104
def _maybe_adjust_name(name: str, version: Sequence[int]) -> str:
5105
    """
5106
    Prior to 0.10.1, we named values blocks like: values_block_0 an the
5107
    name values_0, adjust the given name if necessary.
5108

5109
    Parameters
5110
    ----------
5111
    name : str
5112
    version : Tuple[int, int, int]
5113

5114
    Returns
5115
    -------
5116
    str
5117
    """
5118
    if isinstance(version, str) or len(version) < 3:
5119
        raise ValueError("Version is incorrect, expected sequence of 3 integers.")
5120

5121
    if version[0] == 0 and version[1] <= 10 and version[2] == 0:
5122
        m = re.search(r"values_block_(\d+)", name)
5123
        if m:
5124
            grp = m.groups()[0]
5125
            name = f"values_{grp}"
5126
    return name
5127

5128

5129
def _dtype_to_kind(dtype_str: str) -> str:
5130
    """
5131
    Find the "kind" string describing the given dtype name.
5132
    """
5133
    dtype_str = _ensure_decoded(dtype_str)
5134

5135
    if dtype_str.startswith("string") or dtype_str.startswith("bytes"):
5136
        kind = "string"
5137
    elif dtype_str.startswith("float"):
5138
        kind = "float"
5139
    elif dtype_str.startswith("complex"):
5140
        kind = "complex"
5141
    elif dtype_str.startswith("int") or dtype_str.startswith("uint"):
5142
        kind = "integer"
5143
    elif dtype_str.startswith("datetime64"):
5144
        kind = "datetime64"
5145
    elif dtype_str.startswith("timedelta"):
5146
        kind = "timedelta64"
5147
    elif dtype_str.startswith("bool"):
5148
        kind = "bool"
5149
    elif dtype_str.startswith("category"):
5150
        kind = "category"
5151
    elif dtype_str.startswith("period"):
5152
        # We store the `freq` attr so we can restore from integers
5153
        kind = "integer"
5154
    elif dtype_str == "object":
5155
        kind = "object"
5156
    else:
5157
        raise ValueError(f"cannot interpret dtype of [{dtype_str}]")
5158

5159
    return kind
5160

5161

5162
def _get_data_and_dtype_name(data: ArrayLike):
5163
    """
5164
    Convert the passed data into a storable form and a dtype string.
5165
    """
5166
    if isinstance(data, Categorical):
5167
        data = data.codes
5168

5169
    # For datetime64tz we need to drop the TZ in tests TODO: why?
5170
    dtype_name = data.dtype.name.split("[")[0]
5171

5172
    if data.dtype.kind in ["m", "M"]:
5173
        data = np.asarray(data.view("i8"))
5174
        # TODO: we used to reshape for the dt64tz case, but no longer
5175
        #  doing that doesn't seem to break anything.  why?
5176

5177
    elif isinstance(data, PeriodIndex):
5178
        data = data.asi8
5179

5180
    data = np.asarray(data)
5181
    return data, dtype_name
5182

5183

5184
class Selection:
5185
    """
5186
    Carries out a selection operation on a tables.Table object.
5187

5188
    Parameters
5189
    ----------
5190
    table : a Table object
5191
    where : list of Terms (or convertible to)
5192
    start, stop: indices to start and/or stop selection
5193

5194
    """
5195

5196
    def __init__(
5197
        self,
5198
        table: Table,
5199
        where=None,
5200
        start: int | None = None,
5201
        stop: int | None = None,
5202
    ):
5203
        self.table = table
5204
        self.where = where
5205
        self.start = start
5206
        self.stop = stop
5207
        self.condition = None
5208
        self.filter = None
5209
        self.terms = None
5210
        self.coordinates = None
5211

5212
        if is_list_like(where):
5213

5214
            # see if we have a passed coordinate like
5215
            with suppress(ValueError):
5216
                inferred = lib.infer_dtype(where, skipna=False)
5217
                if inferred == "integer" or inferred == "boolean":
5218
                    where = np.asarray(where)
5219
                    if where.dtype == np.bool_:
5220
                        start, stop = self.start, self.stop
5221
                        if start is None:
5222
                            start = 0
5223
                        if stop is None:
5224
                            stop = self.table.nrows
5225
                        self.coordinates = np.arange(start, stop)[where]
5226
                    elif issubclass(where.dtype.type, np.integer):
5227
                        if (self.start is not None and (where < self.start).any()) or (
5228
                            self.stop is not None and (where >= self.stop).any()
5229
                        ):
5230
                            raise ValueError(
5231
                                "where must have index locations >= start and < stop"
5232
                            )
5233
                        self.coordinates = where
5234

5235
        if self.coordinates is None:
5236

5237
            self.terms = self.generate(where)
5238

5239
            # create the numexpr & the filter
5240
            if self.terms is not None:
5241
                self.condition, self.filter = self.terms.evaluate()
5242

5243
    def generate(self, where):
5244
        """where can be a : dict,list,tuple,string"""
5245
        if where is None:
5246
            return None
5247

5248
        q = self.table.queryables()
5249
        try:
5250
            return PyTablesExpr(where, queryables=q, encoding=self.table.encoding)
5251
        except NameError as err:
5252
            # raise a nice message, suggesting that the user should use
5253
            # data_columns
5254
            qkeys = ",".join(q.keys())
5255
            msg = dedent(
5256
                f"""\
5257
                The passed where expression: {where}
5258
                            contains an invalid variable reference
5259
                            all of the variable references must be a reference to
5260
                            an axis (e.g. 'index' or 'columns'), or a data_column
5261
                            The currently defined references are: {qkeys}
5262
                """
5263
            )
5264
            raise ValueError(msg) from err
5265

5266
    def select(self):
5267
        """
5268
        generate the selection
5269
        """
5270
        if self.condition is not None:
5271
            return self.table.table.read_where(
5272
                self.condition.format(), start=self.start, stop=self.stop
5273
            )
5274
        elif self.coordinates is not None:
5275
            return self.table.table.read_coordinates(self.coordinates)
5276
        return self.table.table.read(start=self.start, stop=self.stop)
5277

5278
    def select_coords(self):
5279
        """
5280
        generate the selection
5281
        """
5282
        start, stop = self.start, self.stop
5283
        nrows = self.table.nrows
5284
        if start is None:
5285
            start = 0
5286
        elif start < 0:
5287
            start += nrows
5288
        if stop is None:
5289
            stop = nrows
5290
        elif stop < 0:
5291
            stop += nrows
5292

5293
        if self.condition is not None:
5294
            return self.table.table.get_where_list(
5295
                self.condition.format(), start=start, stop=stop, sort=True
5296
            )
5297
        elif self.coordinates is not None:
5298
            return self.coordinates
5299

5300
        return np.arange(start, stop)
5301

5302
Product

Resources

Company