Path: blob/master/ invest-robot-contest_TinkoffBotTwitch-main/venv/lib/python3.8/site-packages/pandas/io/sql.py
7813 views
"""1Collection of query wrappers / abstractions to both facilitate data2retrieval and to reduce dependency on DB-specific API.3"""45from __future__ import annotations67from contextlib import contextmanager8from datetime import (9date,10datetime,11time,12)13from functools import partial14import re15from typing import (16Any,17Iterator,18Sequence,19cast,20overload,21)22import warnings2324import numpy as np2526import pandas._libs.lib as lib27from pandas._typing import DtypeArg28from pandas.compat._optional import import_optional_dependency29from pandas.errors import AbstractMethodError30from pandas.util._exceptions import find_stack_level3132from pandas.core.dtypes.common import (33is_datetime64tz_dtype,34is_dict_like,35is_list_like,36)37from pandas.core.dtypes.dtypes import DatetimeTZDtype38from pandas.core.dtypes.missing import isna3940from pandas import get_option41from pandas.core.api import (42DataFrame,43Series,44)45from pandas.core.base import PandasObject46import pandas.core.common as com47from pandas.core.tools.datetimes import to_datetime48from pandas.util.version import Version495051class DatabaseError(OSError):52pass535455# -----------------------------------------------------------------------------56# -- Helper functions575859def _gt14() -> bool:60"""61Check if sqlalchemy.__version__ is at least 1.4.0, when several62deprecations were made.63"""64import sqlalchemy6566return Version(sqlalchemy.__version__) >= Version("1.4.0")676869def _convert_params(sql, params):70"""Convert SQL and params args to DBAPI2.0 compliant format."""71args = [sql]72if params is not None:73if hasattr(params, "keys"): # test if params is a mapping74args += [params]75else:76args += [list(params)]77return args787980def _process_parse_dates_argument(parse_dates):81"""Process parse_dates argument for read_sql functions"""82# handle non-list entries for parse_dates gracefully83if parse_dates is True or parse_dates is None or parse_dates is False:84parse_dates = []8586elif not hasattr(parse_dates, "__iter__"):87parse_dates = [parse_dates]88return parse_dates899091def _handle_date_column(92col, utc: bool | None = None, format: str | dict[str, Any] | None = None93):94if isinstance(format, dict):95# GH35185 Allow custom error values in parse_dates argument of96# read_sql like functions.97# Format can take on custom to_datetime argument values such as98# {"errors": "coerce"} or {"dayfirst": True}99error = format.pop("errors", None) or "ignore"100return to_datetime(col, errors=error, **format)101else:102# Allow passing of formatting string for integers103# GH17855104if format is None and (105issubclass(col.dtype.type, np.floating)106or issubclass(col.dtype.type, np.integer)107):108format = "s"109if format in ["D", "d", "h", "m", "s", "ms", "us", "ns"]:110return to_datetime(col, errors="coerce", unit=format, utc=utc)111elif is_datetime64tz_dtype(col.dtype):112# coerce to UTC timezone113# GH11216114return to_datetime(col, utc=True)115else:116return to_datetime(col, errors="coerce", format=format, utc=utc)117118119def _parse_date_columns(data_frame, parse_dates):120"""121Force non-datetime columns to be read as such.122Supports both string formatted and integer timestamp columns.123"""124parse_dates = _process_parse_dates_argument(parse_dates)125126# we want to coerce datetime64_tz dtypes for now to UTC127# we could in theory do a 'nice' conversion from a FixedOffset tz128# GH11216129for col_name, df_col in data_frame.items():130if is_datetime64tz_dtype(df_col.dtype) or col_name in parse_dates:131try:132fmt = parse_dates[col_name]133except TypeError:134fmt = None135data_frame[col_name] = _handle_date_column(df_col, format=fmt)136137return data_frame138139140def _wrap_result(141data,142columns,143index_col=None,144coerce_float: bool = True,145parse_dates=None,146dtype: DtypeArg | None = None,147):148"""Wrap result set of query in a DataFrame."""149frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float)150151if dtype:152frame = frame.astype(dtype)153154frame = _parse_date_columns(frame, parse_dates)155156if index_col is not None:157frame.set_index(index_col, inplace=True)158159return frame160161162def execute(sql, con, params=None):163"""164Execute the given SQL query using the provided connection object.165166Parameters167----------168sql : string169SQL query to be executed.170con : SQLAlchemy connectable(engine/connection) or sqlite3 connection171Using SQLAlchemy makes it possible to use any DB supported by the172library.173If a DBAPI2 object, only sqlite3 is supported.174params : list or tuple, optional, default: None175List of parameters to pass to execute method.176177Returns178-------179Results Iterable180"""181pandas_sql = pandasSQL_builder(con)182args = _convert_params(sql, params)183return pandas_sql.execute(*args)184185186# -----------------------------------------------------------------------------187# -- Read and write to DataFrames188189190@overload191def read_sql_table(192table_name,193con,194schema=...,195index_col=...,196coerce_float=...,197parse_dates=...,198columns=...,199chunksize: None = ...,200) -> DataFrame:201...202203204@overload205def read_sql_table(206table_name,207con,208schema=...,209index_col=...,210coerce_float=...,211parse_dates=...,212columns=...,213chunksize: int = ...,214) -> Iterator[DataFrame]:215...216217218def read_sql_table(219table_name: str,220con,221schema: str | None = None,222index_col: str | Sequence[str] | None = None,223coerce_float: bool = True,224parse_dates=None,225columns=None,226chunksize: int | None = None,227) -> DataFrame | Iterator[DataFrame]:228"""229Read SQL database table into a DataFrame.230231Given a table name and a SQLAlchemy connectable, returns a DataFrame.232This function does not support DBAPI connections.233234Parameters235----------236table_name : str237Name of SQL table in database.238con : SQLAlchemy connectable or str239A database URI could be provided as str.240SQLite DBAPI connection mode not supported.241schema : str, default None242Name of SQL schema in database to query (if database flavor243supports this). Uses default schema if None (default).244index_col : str or list of str, optional, default: None245Column(s) to set as index(MultiIndex).246coerce_float : bool, default True247Attempts to convert values of non-string, non-numeric objects (like248decimal.Decimal) to floating point. Can result in loss of Precision.249parse_dates : list or dict, default None250- List of column names to parse as dates.251- Dict of ``{column_name: format string}`` where format string is252strftime compatible in case of parsing string times or is one of253(D, s, ns, ms, us) in case of parsing integer timestamps.254- Dict of ``{column_name: arg dict}``, where the arg dict corresponds255to the keyword arguments of :func:`pandas.to_datetime`256Especially useful with databases without native Datetime support,257such as SQLite.258columns : list, default None259List of column names to select from SQL table.260chunksize : int, default None261If specified, returns an iterator where `chunksize` is the number of262rows to include in each chunk.263264Returns265-------266DataFrame or Iterator[DataFrame]267A SQL table is returned as two-dimensional data structure with labeled268axes.269270See Also271--------272read_sql_query : Read SQL query into a DataFrame.273read_sql : Read SQL query or database table into a DataFrame.274275Notes276-----277Any datetime values with time zone information will be converted to UTC.278279Examples280--------281>>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP282"""283pandas_sql = pandasSQL_builder(con, schema=schema)284if not pandas_sql.has_table(table_name):285raise ValueError(f"Table {table_name} not found")286287table = pandas_sql.read_table(288table_name,289index_col=index_col,290coerce_float=coerce_float,291parse_dates=parse_dates,292columns=columns,293chunksize=chunksize,294)295296if table is not None:297return table298else:299raise ValueError(f"Table {table_name} not found", con)300301302@overload303def read_sql_query(304sql,305con,306index_col=...,307coerce_float=...,308params=...,309parse_dates=...,310chunksize: None = ...,311dtype: DtypeArg | None = ...,312) -> DataFrame:313...314315316@overload317def read_sql_query(318sql,319con,320index_col=...,321coerce_float=...,322params=...,323parse_dates=...,324chunksize: int = ...,325dtype: DtypeArg | None = ...,326) -> Iterator[DataFrame]:327...328329330def read_sql_query(331sql,332con,333index_col=None,334coerce_float: bool = True,335params=None,336parse_dates=None,337chunksize: int | None = None,338dtype: DtypeArg | None = None,339) -> DataFrame | Iterator[DataFrame]:340"""341Read SQL query into a DataFrame.342343Returns a DataFrame corresponding to the result set of the query344string. Optionally provide an `index_col` parameter to use one of the345columns as the index, otherwise default integer index will be used.346347Parameters348----------349sql : str SQL query or SQLAlchemy Selectable (select or text object)350SQL query to be executed.351con : SQLAlchemy connectable, str, or sqlite3 connection352Using SQLAlchemy makes it possible to use any DB supported by that353library. If a DBAPI2 object, only sqlite3 is supported.354index_col : str or list of str, optional, default: None355Column(s) to set as index(MultiIndex).356coerce_float : bool, default True357Attempts to convert values of non-string, non-numeric objects (like358decimal.Decimal) to floating point. Useful for SQL result sets.359params : list, tuple or dict, optional, default: None360List of parameters to pass to execute method. The syntax used361to pass parameters is database driver dependent. Check your362database driver documentation for which of the five syntax styles,363described in PEP 249's paramstyle, is supported.364Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}.365parse_dates : list or dict, default: None366- List of column names to parse as dates.367- Dict of ``{column_name: format string}`` where format string is368strftime compatible in case of parsing string times, or is one of369(D, s, ns, ms, us) in case of parsing integer timestamps.370- Dict of ``{column_name: arg dict}``, where the arg dict corresponds371to the keyword arguments of :func:`pandas.to_datetime`372Especially useful with databases without native Datetime support,373such as SQLite.374chunksize : int, default None375If specified, return an iterator where `chunksize` is the number of376rows to include in each chunk.377dtype : Type name or dict of columns378Data type for data or columns. E.g. np.float64 or379{‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}.380381.. versionadded:: 1.3.0382383Returns384-------385DataFrame or Iterator[DataFrame]386387See Also388--------389read_sql_table : Read SQL database table into a DataFrame.390read_sql : Read SQL query or database table into a DataFrame.391392Notes393-----394Any datetime values with time zone information parsed via the `parse_dates`395parameter will be converted to UTC.396"""397pandas_sql = pandasSQL_builder(con)398return pandas_sql.read_query(399sql,400index_col=index_col,401params=params,402coerce_float=coerce_float,403parse_dates=parse_dates,404chunksize=chunksize,405dtype=dtype,406)407408409@overload410def read_sql(411sql,412con,413index_col=...,414coerce_float=...,415params=...,416parse_dates=...,417columns=...,418chunksize: None = ...,419) -> DataFrame:420...421422423@overload424def read_sql(425sql,426con,427index_col=...,428coerce_float=...,429params=...,430parse_dates=...,431columns=...,432chunksize: int = ...,433) -> Iterator[DataFrame]:434...435436437def read_sql(438sql,439con,440index_col: str | Sequence[str] | None = None,441coerce_float: bool = True,442params=None,443parse_dates=None,444columns=None,445chunksize: int | None = None,446) -> DataFrame | Iterator[DataFrame]:447"""448Read SQL query or database table into a DataFrame.449450This function is a convenience wrapper around ``read_sql_table`` and451``read_sql_query`` (for backward compatibility). It will delegate452to the specific function depending on the provided input. A SQL query453will be routed to ``read_sql_query``, while a database table name will454be routed to ``read_sql_table``. Note that the delegated function might455have more specific notes about their functionality not listed here.456457Parameters458----------459sql : str or SQLAlchemy Selectable (select or text object)460SQL query to be executed or a table name.461con : SQLAlchemy connectable, str, or sqlite3 connection462Using SQLAlchemy makes it possible to use any DB supported by that463library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible464for engine disposal and connection closure for the SQLAlchemy connectable; str465connections are closed automatically. See466`here <https://docs.sqlalchemy.org/en/13/core/connections.html>`_.467index_col : str or list of str, optional, default: None468Column(s) to set as index(MultiIndex).469coerce_float : bool, default True470Attempts to convert values of non-string, non-numeric objects (like471decimal.Decimal) to floating point, useful for SQL result sets.472params : list, tuple or dict, optional, default: None473List of parameters to pass to execute method. The syntax used474to pass parameters is database driver dependent. Check your475database driver documentation for which of the five syntax styles,476described in PEP 249's paramstyle, is supported.477Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}.478parse_dates : list or dict, default: None479- List of column names to parse as dates.480- Dict of ``{column_name: format string}`` where format string is481strftime compatible in case of parsing string times, or is one of482(D, s, ns, ms, us) in case of parsing integer timestamps.483- Dict of ``{column_name: arg dict}``, where the arg dict corresponds484to the keyword arguments of :func:`pandas.to_datetime`485Especially useful with databases without native Datetime support,486such as SQLite.487columns : list, default: None488List of column names to select from SQL table (only used when reading489a table).490chunksize : int, default None491If specified, return an iterator where `chunksize` is the492number of rows to include in each chunk.493494Returns495-------496DataFrame or Iterator[DataFrame]497498See Also499--------500read_sql_table : Read SQL database table into a DataFrame.501read_sql_query : Read SQL query into a DataFrame.502503Examples504--------505Read data from SQL via either a SQL query or a SQL tablename.506When using a SQLite database only SQL queries are accepted,507providing only the SQL tablename will result in an error.508509>>> from sqlite3 import connect510>>> conn = connect(':memory:')511>>> df = pd.DataFrame(data=[[0, '10/11/12'], [1, '12/11/10']],512... columns=['int_column', 'date_column'])513>>> df.to_sql('test_data', conn)5142515516>>> pd.read_sql('SELECT int_column, date_column FROM test_data', conn)517int_column date_column5180 0 10/11/125191 1 12/11/10520521>>> pd.read_sql('test_data', 'postgres:///db_name') # doctest:+SKIP522523Apply date parsing to columns through the ``parse_dates`` argument524525>>> pd.read_sql('SELECT int_column, date_column FROM test_data',526... conn,527... parse_dates=["date_column"])528int_column date_column5290 0 2012-10-115301 1 2010-12-11531532The ``parse_dates`` argument calls ``pd.to_datetime`` on the provided columns.533Custom argument values for applying ``pd.to_datetime`` on a column are specified534via a dictionary format:5351. Ignore errors while parsing the values of "date_column"536537>>> pd.read_sql('SELECT int_column, date_column FROM test_data',538... conn,539... parse_dates={"date_column": {"errors": "ignore"}})540int_column date_column5410 0 2012-10-115421 1 2010-12-115435442. Apply a dayfirst date parsing order on the values of "date_column"545546>>> pd.read_sql('SELECT int_column, date_column FROM test_data',547... conn,548... parse_dates={"date_column": {"dayfirst": True}})549int_column date_column5500 0 2012-11-105511 1 2010-11-125525533. Apply custom formatting when date parsing the values of "date_column"554555>>> pd.read_sql('SELECT int_column, date_column FROM test_data',556... conn,557... parse_dates={"date_column": {"format": "%d/%m/%y"}})558int_column date_column5590 0 2012-11-105601 1 2010-11-12561"""562pandas_sql = pandasSQL_builder(con)563564if isinstance(pandas_sql, SQLiteDatabase):565return pandas_sql.read_query(566sql,567index_col=index_col,568params=params,569coerce_float=coerce_float,570parse_dates=parse_dates,571chunksize=chunksize,572)573574try:575_is_table_name = pandas_sql.has_table(sql)576except Exception:577# using generic exception to catch errors from sql drivers (GH24988)578_is_table_name = False579580if _is_table_name:581pandas_sql.meta.reflect(bind=pandas_sql.connectable, only=[sql])582return pandas_sql.read_table(583sql,584index_col=index_col,585coerce_float=coerce_float,586parse_dates=parse_dates,587columns=columns,588chunksize=chunksize,589)590else:591return pandas_sql.read_query(592sql,593index_col=index_col,594params=params,595coerce_float=coerce_float,596parse_dates=parse_dates,597chunksize=chunksize,598)599600601def to_sql(602frame,603name: str,604con,605schema: str | None = None,606if_exists: str = "fail",607index: bool = True,608index_label=None,609chunksize: int | None = None,610dtype: DtypeArg | None = None,611method: str | None = None,612engine: str = "auto",613**engine_kwargs,614) -> int | None:615"""616Write records stored in a DataFrame to a SQL database.617618Parameters619----------620frame : DataFrame, Series621name : str622Name of SQL table.623con : SQLAlchemy connectable(engine/connection) or database string URI624or sqlite3 DBAPI2 connection625Using SQLAlchemy makes it possible to use any DB supported by that626library.627If a DBAPI2 object, only sqlite3 is supported.628schema : str, optional629Name of SQL schema in database to write to (if database flavor630supports this). If None, use default schema (default).631if_exists : {'fail', 'replace', 'append'}, default 'fail'632- fail: If table exists, do nothing.633- replace: If table exists, drop it, recreate it, and insert data.634- append: If table exists, insert data. Create if does not exist.635index : bool, default True636Write DataFrame index as a column.637index_label : str or sequence, optional638Column label for index column(s). If None is given (default) and639`index` is True, then the index names are used.640A sequence should be given if the DataFrame uses MultiIndex.641chunksize : int, optional642Specify the number of rows in each batch to be written at a time.643By default, all rows will be written at once.644dtype : dict or scalar, optional645Specifying the datatype for columns. If a dictionary is used, the646keys should be the column names and the values should be the647SQLAlchemy types or strings for the sqlite3 fallback mode. If a648scalar is provided, it will be applied to all columns.649method : {None, 'multi', callable}, optional650Controls the SQL insertion clause used:651652- None : Uses standard SQL ``INSERT`` clause (one per row).653- ``'multi'``: Pass multiple values in a single ``INSERT`` clause.654- callable with signature ``(pd_table, conn, keys, data_iter) -> int | None``.655656Details and a sample callable implementation can be found in the657section :ref:`insert method <io.sql.method>`.658engine : {'auto', 'sqlalchemy'}, default 'auto'659SQL engine library to use. If 'auto', then the option660``io.sql.engine`` is used. The default ``io.sql.engine``661behavior is 'sqlalchemy'662663.. versionadded:: 1.3.0664665**engine_kwargs666Any additional kwargs are passed to the engine.667668Returns669-------670None or int671Number of rows affected by to_sql. None is returned if the callable672passed into ``method`` does not return the number of rows.673674.. versionadded:: 1.4.0675676Notes677-----678The returned rows affected is the sum of the ``rowcount`` attribute of ``sqlite3.Cursor``679or SQLAlchemy connectable. The returned value may not reflect the exact number of written680rows as stipulated in the681`sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or682`SQLAlchemy <https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.BaseCursorResult.rowcount>`__683""" # noqa:E501684if if_exists not in ("fail", "replace", "append"):685raise ValueError(f"'{if_exists}' is not valid for if_exists")686687pandas_sql = pandasSQL_builder(con, schema=schema)688689if isinstance(frame, Series):690frame = frame.to_frame()691elif not isinstance(frame, DataFrame):692raise NotImplementedError(693"'frame' argument should be either a Series or a DataFrame"694)695696return pandas_sql.to_sql(697frame,698name,699if_exists=if_exists,700index=index,701index_label=index_label,702schema=schema,703chunksize=chunksize,704dtype=dtype,705method=method,706engine=engine,707**engine_kwargs,708)709710711def has_table(table_name: str, con, schema: str | None = None):712"""713Check if DataBase has named table.714715Parameters716----------717table_name: string718Name of SQL table.719con: SQLAlchemy connectable(engine/connection) or sqlite3 DBAPI2 connection720Using SQLAlchemy makes it possible to use any DB supported by that721library.722If a DBAPI2 object, only sqlite3 is supported.723schema : string, default None724Name of SQL schema in database to write to (if database flavor supports725this). If None, use default schema (default).726727Returns728-------729boolean730"""731pandas_sql = pandasSQL_builder(con, schema=schema)732return pandas_sql.has_table(table_name)733734735table_exists = has_table736737738def pandasSQL_builder(con, schema: str | None = None):739"""740Convenience function to return the correct PandasSQL subclass based on the741provided parameters.742"""743import sqlite3744import warnings745746if isinstance(con, sqlite3.Connection) or con is None:747return SQLiteDatabase(con)748749sqlalchemy = import_optional_dependency("sqlalchemy", errors="ignore")750751if isinstance(con, str):752if sqlalchemy is None:753raise ImportError("Using URI string without sqlalchemy installed.")754else:755con = sqlalchemy.create_engine(con)756757if sqlalchemy is not None and isinstance(con, sqlalchemy.engine.Connectable):758return SQLDatabase(con, schema=schema)759760warnings.warn(761"pandas only support SQLAlchemy connectable(engine/connection) or"762"database string URI or sqlite3 DBAPI2 connection"763"other DBAPI2 objects are not tested, please consider using SQLAlchemy",764UserWarning,765)766return SQLiteDatabase(con)767768769class SQLTable(PandasObject):770"""771For mapping Pandas tables to SQL tables.772Uses fact that table is reflected by SQLAlchemy to773do better type conversions.774Also holds various flags needed to avoid having to775pass them between functions all the time.776"""777778# TODO: support for multiIndex779780def __init__(781self,782name: str,783pandas_sql_engine,784frame=None,785index=True,786if_exists="fail",787prefix="pandas",788index_label=None,789schema=None,790keys=None,791dtype: DtypeArg | None = None,792):793self.name = name794self.pd_sql = pandas_sql_engine795self.prefix = prefix796self.frame = frame797self.index = self._index_name(index, index_label)798self.schema = schema799self.if_exists = if_exists800self.keys = keys801self.dtype = dtype802803if frame is not None:804# We want to initialize based on a dataframe805self.table = self._create_table_setup()806else:807# no data provided, read-only mode808self.table = self.pd_sql.get_table(self.name, self.schema)809810if self.table is None:811raise ValueError(f"Could not init table '{name}'")812813def exists(self):814return self.pd_sql.has_table(self.name, self.schema)815816def sql_schema(self):817from sqlalchemy.schema import CreateTable818819return str(CreateTable(self.table).compile(self.pd_sql.connectable))820821def _execute_create(self):822# Inserting table into database, add to MetaData object823if _gt14():824self.table = self.table.to_metadata(self.pd_sql.meta)825else:826self.table = self.table.tometadata(self.pd_sql.meta)827self.table.create(bind=self.pd_sql.connectable)828829def create(self):830if self.exists():831if self.if_exists == "fail":832raise ValueError(f"Table '{self.name}' already exists.")833elif self.if_exists == "replace":834self.pd_sql.drop_table(self.name, self.schema)835self._execute_create()836elif self.if_exists == "append":837pass838else:839raise ValueError(f"'{self.if_exists}' is not valid for if_exists")840else:841self._execute_create()842843def _execute_insert(self, conn, keys: list[str], data_iter) -> int:844"""845Execute SQL statement inserting data846847Parameters848----------849conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection850keys : list of str851Column names852data_iter : generator of list853Each item contains a list of values to be inserted854"""855data = [dict(zip(keys, row)) for row in data_iter]856result = conn.execute(self.table.insert(), data)857return result.rowcount858859def _execute_insert_multi(self, conn, keys: list[str], data_iter) -> int:860"""861Alternative to _execute_insert for DBs support multivalue INSERT.862863Note: multi-value insert is usually faster for analytics DBs864and tables containing a few columns865but performance degrades quickly with increase of columns.866"""867868from sqlalchemy import insert869870data = [dict(zip(keys, row)) for row in data_iter]871stmt = insert(self.table).values(data)872result = conn.execute(stmt)873return result.rowcount874875def insert_data(self):876if self.index is not None:877temp = self.frame.copy()878temp.index.names = self.index879try:880temp.reset_index(inplace=True)881except ValueError as err:882raise ValueError(f"duplicate name in index/columns: {err}") from err883else:884temp = self.frame885886column_names = list(map(str, temp.columns))887ncols = len(column_names)888data_list = [None] * ncols889890for i, (_, ser) in enumerate(temp.items()):891vals = ser._values892if vals.dtype.kind == "M":893d = vals.to_pydatetime()894elif vals.dtype.kind == "m":895# store as integers, see GH#6921, GH#7076896d = vals.view("i8").astype(object)897else:898d = vals.astype(object)899900assert isinstance(d, np.ndarray), type(d)901902if ser._can_hold_na:903# Note: this will miss timedeltas since they are converted to int904mask = isna(d)905d[mask] = None906907# error: No overload variant of "__setitem__" of "list" matches908# argument types "int", "ndarray"909data_list[i] = d # type: ignore[call-overload]910911return column_names, data_list912913def insert(914self, chunksize: int | None = None, method: str | None = None915) -> int | None:916917# set insert method918if method is None:919exec_insert = self._execute_insert920elif method == "multi":921exec_insert = self._execute_insert_multi922elif callable(method):923exec_insert = partial(method, self)924else:925raise ValueError(f"Invalid parameter `method`: {method}")926927keys, data_list = self.insert_data()928929nrows = len(self.frame)930931if nrows == 0:932return 0933934if chunksize is None:935chunksize = nrows936elif chunksize == 0:937raise ValueError("chunksize argument should be non-zero")938939chunks = (nrows // chunksize) + 1940total_inserted = 0941with self.pd_sql.run_transaction() as conn:942for i in range(chunks):943start_i = i * chunksize944end_i = min((i + 1) * chunksize, nrows)945if start_i >= end_i:946break947948chunk_iter = zip(*(arr[start_i:end_i] for arr in data_list))949num_inserted = exec_insert(conn, keys, chunk_iter)950if num_inserted is None:951total_inserted = None952else:953total_inserted += num_inserted954return total_inserted955956def _query_iterator(957self,958result,959chunksize: str | None,960columns,961coerce_float: bool = True,962parse_dates=None,963):964"""Return generator through chunked result set."""965has_read_data = False966while True:967data = result.fetchmany(chunksize)968if not data:969if not has_read_data:970yield DataFrame.from_records(971[], columns=columns, coerce_float=coerce_float972)973break974else:975has_read_data = True976self.frame = DataFrame.from_records(977data, columns=columns, coerce_float=coerce_float978)979980self._harmonize_columns(parse_dates=parse_dates)981982if self.index is not None:983self.frame.set_index(self.index, inplace=True)984985yield self.frame986987def read(self, coerce_float=True, parse_dates=None, columns=None, chunksize=None):988from sqlalchemy import select989990if columns is not None and len(columns) > 0:991cols = [self.table.c[n] for n in columns]992if self.index is not None:993for idx in self.index[::-1]:994cols.insert(0, self.table.c[idx])995sql_select = select(*cols) if _gt14() else select(cols)996else:997sql_select = select(self.table) if _gt14() else self.table.select()998999result = self.pd_sql.execute(sql_select)1000column_names = result.keys()10011002if chunksize is not None:1003return self._query_iterator(1004result,1005chunksize,1006column_names,1007coerce_float=coerce_float,1008parse_dates=parse_dates,1009)1010else:1011data = result.fetchall()1012self.frame = DataFrame.from_records(1013data, columns=column_names, coerce_float=coerce_float1014)10151016self._harmonize_columns(parse_dates=parse_dates)10171018if self.index is not None:1019self.frame.set_index(self.index, inplace=True)10201021return self.frame10221023def _index_name(self, index, index_label):1024# for writing: index=True to include index in sql table1025if index is True:1026nlevels = self.frame.index.nlevels1027# if index_label is specified, set this as index name(s)1028if index_label is not None:1029if not isinstance(index_label, list):1030index_label = [index_label]1031if len(index_label) != nlevels:1032raise ValueError(1033"Length of 'index_label' should match number of "1034f"levels, which is {nlevels}"1035)1036else:1037return index_label1038# return the used column labels for the index columns1039if (1040nlevels == 11041and "index" not in self.frame.columns1042and self.frame.index.name is None1043):1044return ["index"]1045else:1046return com.fill_missing_names(self.frame.index.names)10471048# for reading: index=(list of) string to specify column to set as index1049elif isinstance(index, str):1050return [index]1051elif isinstance(index, list):1052return index1053else:1054return None10551056def _get_column_names_and_types(self, dtype_mapper):1057column_names_and_types = []1058if self.index is not None:1059for i, idx_label in enumerate(self.index):1060idx_type = dtype_mapper(self.frame.index._get_level_values(i))1061column_names_and_types.append((str(idx_label), idx_type, True))10621063column_names_and_types += [1064(str(self.frame.columns[i]), dtype_mapper(self.frame.iloc[:, i]), False)1065for i in range(len(self.frame.columns))1066]10671068return column_names_and_types10691070def _create_table_setup(self):1071from sqlalchemy import (1072Column,1073PrimaryKeyConstraint,1074Table,1075)1076from sqlalchemy.schema import MetaData10771078column_names_and_types = self._get_column_names_and_types(self._sqlalchemy_type)10791080columns = [1081Column(name, typ, index=is_index)1082for name, typ, is_index in column_names_and_types1083]10841085if self.keys is not None:1086if not is_list_like(self.keys):1087keys = [self.keys]1088else:1089keys = self.keys1090pkc = PrimaryKeyConstraint(*keys, name=self.name + "_pk")1091columns.append(pkc)10921093schema = self.schema or self.pd_sql.meta.schema10941095# At this point, attach to new metadata, only attach to self.meta1096# once table is created.1097meta = MetaData()1098return Table(self.name, meta, *columns, schema=schema)10991100def _harmonize_columns(self, parse_dates=None):1101"""1102Make the DataFrame's column types align with the SQL table1103column types.1104Need to work around limited NA value support. Floats are always1105fine, ints must always be floats if there are Null values.1106Booleans are hard because converting bool column with None replaces1107all Nones with false. Therefore only convert bool if there are no1108NA values.1109Datetimes should already be converted to np.datetime64 if supported,1110but here we also force conversion if required.1111"""1112parse_dates = _process_parse_dates_argument(parse_dates)11131114for sql_col in self.table.columns:1115col_name = sql_col.name1116try:1117df_col = self.frame[col_name]11181119# Handle date parsing upfront; don't try to convert columns1120# twice1121if col_name in parse_dates:1122try:1123fmt = parse_dates[col_name]1124except TypeError:1125fmt = None1126self.frame[col_name] = _handle_date_column(df_col, format=fmt)1127continue11281129# the type the dataframe column should have1130col_type = self._get_dtype(sql_col.type)11311132if (1133col_type is datetime1134or col_type is date1135or col_type is DatetimeTZDtype1136):1137# Convert tz-aware Datetime SQL columns to UTC1138utc = col_type is DatetimeTZDtype1139self.frame[col_name] = _handle_date_column(df_col, utc=utc)1140elif col_type is float:1141# floats support NA, can always convert!1142self.frame[col_name] = df_col.astype(col_type, copy=False)11431144elif len(df_col) == df_col.count():1145# No NA values, can convert ints and bools1146if col_type is np.dtype("int64") or col_type is bool:1147self.frame[col_name] = df_col.astype(col_type, copy=False)1148except KeyError:1149pass # this column not in results11501151def _sqlalchemy_type(self, col):11521153dtype: DtypeArg = self.dtype or {}1154if is_dict_like(dtype):1155dtype = cast(dict, dtype)1156if col.name in dtype:1157return dtype[col.name]11581159# Infer type of column, while ignoring missing values.1160# Needed for inserting typed data containing NULLs, GH 8778.1161col_type = lib.infer_dtype(col, skipna=True)11621163from sqlalchemy.types import (1164TIMESTAMP,1165BigInteger,1166Boolean,1167Date,1168DateTime,1169Float,1170Integer,1171SmallInteger,1172Text,1173Time,1174)11751176if col_type == "datetime64" or col_type == "datetime":1177# GH 9086: TIMESTAMP is the suggested type if the column contains1178# timezone information1179try:1180if col.dt.tz is not None:1181return TIMESTAMP(timezone=True)1182except AttributeError:1183# The column is actually a DatetimeIndex1184# GH 26761 or an Index with date-like data e.g. 9999-01-011185if getattr(col, "tz", None) is not None:1186return TIMESTAMP(timezone=True)1187return DateTime1188if col_type == "timedelta64":1189warnings.warn(1190"the 'timedelta' type is not supported, and will be "1191"written as integer values (ns frequency) to the database.",1192UserWarning,1193stacklevel=find_stack_level(),1194)1195return BigInteger1196elif col_type == "floating":1197if col.dtype == "float32":1198return Float(precision=23)1199else:1200return Float(precision=53)1201elif col_type == "integer":1202# GH35076 Map pandas integer to optimal SQLAlchemy integer type1203if col.dtype.name.lower() in ("int8", "uint8", "int16"):1204return SmallInteger1205elif col.dtype.name.lower() in ("uint16", "int32"):1206return Integer1207elif col.dtype.name.lower() == "uint64":1208raise ValueError("Unsigned 64 bit integer datatype is not supported")1209else:1210return BigInteger1211elif col_type == "boolean":1212return Boolean1213elif col_type == "date":1214return Date1215elif col_type == "time":1216return Time1217elif col_type == "complex":1218raise ValueError("Complex datatypes not supported")12191220return Text12211222def _get_dtype(self, sqltype):1223from sqlalchemy.types import (1224TIMESTAMP,1225Boolean,1226Date,1227DateTime,1228Float,1229Integer,1230)12311232if isinstance(sqltype, Float):1233return float1234elif isinstance(sqltype, Integer):1235# TODO: Refine integer size.1236return np.dtype("int64")1237elif isinstance(sqltype, TIMESTAMP):1238# we have a timezone capable type1239if not sqltype.timezone:1240return datetime1241return DatetimeTZDtype1242elif isinstance(sqltype, DateTime):1243# Caution: np.datetime64 is also a subclass of np.number.1244return datetime1245elif isinstance(sqltype, Date):1246return date1247elif isinstance(sqltype, Boolean):1248return bool1249return object125012511252class PandasSQL(PandasObject):1253"""1254Subclasses Should define read_sql and to_sql.1255"""12561257def read_sql(self, *args, **kwargs):1258raise ValueError(1259"PandasSQL must be created with an SQLAlchemy "1260"connectable or sqlite connection"1261)12621263def to_sql(1264self,1265frame,1266name,1267if_exists="fail",1268index=True,1269index_label=None,1270schema=None,1271chunksize=None,1272dtype: DtypeArg | None = None,1273method=None,1274) -> int | None:1275raise ValueError(1276"PandasSQL must be created with an SQLAlchemy "1277"connectable or sqlite connection"1278)127912801281class BaseEngine:1282def insert_records(1283self,1284table: SQLTable,1285con,1286frame,1287name,1288index=True,1289schema=None,1290chunksize=None,1291method=None,1292**engine_kwargs,1293) -> int | None:1294"""1295Inserts data into already-prepared table1296"""1297raise AbstractMethodError(self)129812991300class SQLAlchemyEngine(BaseEngine):1301def __init__(self):1302import_optional_dependency(1303"sqlalchemy", extra="sqlalchemy is required for SQL support."1304)13051306def insert_records(1307self,1308table: SQLTable,1309con,1310frame,1311name,1312index=True,1313schema=None,1314chunksize=None,1315method=None,1316**engine_kwargs,1317) -> int | None:1318from sqlalchemy import exc13191320try:1321return table.insert(chunksize=chunksize, method=method)1322except exc.SQLAlchemyError as err:1323# GH344311324# https://stackoverflow.com/a/67358288/60678481325msg = r"""(\(1054, "Unknown column 'inf(e0)?' in 'field list'"\))(?#1326)|inf can not be used with MySQL"""1327err_text = str(err.orig)1328if re.search(msg, err_text):1329raise ValueError("inf cannot be used with MySQL") from err1330else:1331raise err133213331334def get_engine(engine: str) -> BaseEngine:1335"""return our implementation"""1336if engine == "auto":1337engine = get_option("io.sql.engine")13381339if engine == "auto":1340# try engines in this order1341engine_classes = [SQLAlchemyEngine]13421343error_msgs = ""1344for engine_class in engine_classes:1345try:1346return engine_class()1347except ImportError as err:1348error_msgs += "\n - " + str(err)13491350raise ImportError(1351"Unable to find a usable engine; "1352"tried using: 'sqlalchemy'.\n"1353"A suitable version of "1354"sqlalchemy is required for sql I/O "1355"support.\n"1356"Trying to import the above resulted in these errors:"1357f"{error_msgs}"1358)13591360elif engine == "sqlalchemy":1361return SQLAlchemyEngine()13621363raise ValueError("engine must be one of 'auto', 'sqlalchemy'")136413651366class SQLDatabase(PandasSQL):1367"""1368This class enables conversion between DataFrame and SQL databases1369using SQLAlchemy to handle DataBase abstraction.13701371Parameters1372----------1373engine : SQLAlchemy connectable1374Connectable to connect with the database. Using SQLAlchemy makes it1375possible to use any DB supported by that library.1376schema : string, default None1377Name of SQL schema in database to write to (if database flavor1378supports this). If None, use default schema (default).13791380"""13811382def __init__(self, engine, schema: str | None = None):1383from sqlalchemy.schema import MetaData13841385self.connectable = engine1386self.meta = MetaData(schema=schema)13871388@contextmanager1389def run_transaction(self):1390from sqlalchemy.engine import Engine13911392if isinstance(self.connectable, Engine):1393with self.connectable.connect() as conn:1394with conn.begin():1395yield conn1396else:1397yield self.connectable13981399def execute(self, *args, **kwargs):1400"""Simple passthrough to SQLAlchemy connectable"""1401return self.connectable.execution_options().execute(*args, **kwargs)14021403def read_table(1404self,1405table_name: str,1406index_col: str | Sequence[str] | None = None,1407coerce_float: bool = True,1408parse_dates=None,1409columns=None,1410schema: str | None = None,1411chunksize: int | None = None,1412):1413"""1414Read SQL database table into a DataFrame.14151416Parameters1417----------1418table_name : str1419Name of SQL table in database.1420index_col : string, optional, default: None1421Column to set as index.1422coerce_float : bool, default True1423Attempts to convert values of non-string, non-numeric objects1424(like decimal.Decimal) to floating point. This can result in1425loss of precision.1426parse_dates : list or dict, default: None1427- List of column names to parse as dates.1428- Dict of ``{column_name: format string}`` where format string is1429strftime compatible in case of parsing string times, or is one of1430(D, s, ns, ms, us) in case of parsing integer timestamps.1431- Dict of ``{column_name: arg}``, where the arg corresponds1432to the keyword arguments of :func:`pandas.to_datetime`.1433Especially useful with databases without native Datetime support,1434such as SQLite.1435columns : list, default: None1436List of column names to select from SQL table.1437schema : string, default None1438Name of SQL schema in database to query (if database flavor1439supports this). If specified, this overwrites the default1440schema of the SQL database object.1441chunksize : int, default None1442If specified, return an iterator where `chunksize` is the number1443of rows to include in each chunk.14441445Returns1446-------1447DataFrame14481449See Also1450--------1451pandas.read_sql_table1452SQLDatabase.read_query14531454"""1455table = SQLTable(table_name, self, index=index_col, schema=schema)1456return table.read(1457coerce_float=coerce_float,1458parse_dates=parse_dates,1459columns=columns,1460chunksize=chunksize,1461)14621463@staticmethod1464def _query_iterator(1465result,1466chunksize: int,1467columns,1468index_col=None,1469coerce_float=True,1470parse_dates=None,1471dtype: DtypeArg | None = None,1472):1473"""Return generator through chunked result set"""1474has_read_data = False1475while True:1476data = result.fetchmany(chunksize)1477if not data:1478if not has_read_data:1479yield _wrap_result(1480[],1481columns,1482index_col=index_col,1483coerce_float=coerce_float,1484parse_dates=parse_dates,1485)1486break1487else:1488has_read_data = True1489yield _wrap_result(1490data,1491columns,1492index_col=index_col,1493coerce_float=coerce_float,1494parse_dates=parse_dates,1495dtype=dtype,1496)14971498def read_query(1499self,1500sql: str,1501index_col: str | None = None,1502coerce_float: bool = True,1503parse_dates=None,1504params=None,1505chunksize: int | None = None,1506dtype: DtypeArg | None = None,1507):1508"""1509Read SQL query into a DataFrame.15101511Parameters1512----------1513sql : str1514SQL query to be executed.1515index_col : string, optional, default: None1516Column name to use as index for the returned DataFrame object.1517coerce_float : bool, default True1518Attempt to convert values of non-string, non-numeric objects (like1519decimal.Decimal) to floating point, useful for SQL result sets.1520params : list, tuple or dict, optional, default: None1521List of parameters to pass to execute method. The syntax used1522to pass parameters is database driver dependent. Check your1523database driver documentation for which of the five syntax styles,1524described in PEP 249's paramstyle, is supported.1525Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}1526parse_dates : list or dict, default: None1527- List of column names to parse as dates.1528- Dict of ``{column_name: format string}`` where format string is1529strftime compatible in case of parsing string times, or is one of1530(D, s, ns, ms, us) in case of parsing integer timestamps.1531- Dict of ``{column_name: arg dict}``, where the arg dict1532corresponds to the keyword arguments of1533:func:`pandas.to_datetime` Especially useful with databases1534without native Datetime support, such as SQLite.1535chunksize : int, default None1536If specified, return an iterator where `chunksize` is the number1537of rows to include in each chunk.1538dtype : Type name or dict of columns1539Data type for data or columns. E.g. np.float64 or1540{‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}15411542.. versionadded:: 1.3.015431544Returns1545-------1546DataFrame15471548See Also1549--------1550read_sql_table : Read SQL database table into a DataFrame.1551read_sql15521553"""1554args = _convert_params(sql, params)15551556result = self.execute(*args)1557columns = result.keys()15581559if chunksize is not None:1560return self._query_iterator(1561result,1562chunksize,1563columns,1564index_col=index_col,1565coerce_float=coerce_float,1566parse_dates=parse_dates,1567dtype=dtype,1568)1569else:1570data = result.fetchall()1571frame = _wrap_result(1572data,1573columns,1574index_col=index_col,1575coerce_float=coerce_float,1576parse_dates=parse_dates,1577dtype=dtype,1578)1579return frame15801581read_sql = read_query15821583def prep_table(1584self,1585frame,1586name,1587if_exists="fail",1588index=True,1589index_label=None,1590schema=None,1591dtype: DtypeArg | None = None,1592) -> SQLTable:1593"""1594Prepares table in the database for data insertion. Creates it if needed, etc.1595"""1596if dtype:1597if not is_dict_like(dtype):1598# error: Value expression in dictionary comprehension has incompatible1599# type "Union[ExtensionDtype, str, dtype[Any], Type[object],1600# Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]],1601# Type[str], Type[float], Type[int], Type[complex], Type[bool],1602# Type[object]]]]"; expected type "Union[ExtensionDtype, str,1603# dtype[Any], Type[object]]"1604dtype = {col_name: dtype for col_name in frame} # type: ignore[misc]1605else:1606dtype = cast(dict, dtype)16071608from sqlalchemy.types import (1609TypeEngine,1610to_instance,1611)16121613for col, my_type in dtype.items():1614if not isinstance(to_instance(my_type), TypeEngine):1615raise ValueError(f"The type of {col} is not a SQLAlchemy type")16161617table = SQLTable(1618name,1619self,1620frame=frame,1621index=index,1622if_exists=if_exists,1623index_label=index_label,1624schema=schema,1625dtype=dtype,1626)1627table.create()1628return table16291630def check_case_sensitive(1631self,1632name,1633schema,1634):1635"""1636Checks table name for issues with case-sensitivity.1637Method is called after data is inserted.1638"""1639if not name.isdigit() and not name.islower():1640# check for potentially case sensitivity issues (GH7815)1641# Only check when name is not a number and name is not lower case1642engine = self.connectable.engine1643with self.connectable.connect() as conn:1644if _gt14():1645from sqlalchemy import inspect16461647insp = inspect(conn)1648table_names = insp.get_table_names(1649schema=schema or self.meta.schema1650)1651else:1652table_names = engine.table_names(1653schema=schema or self.meta.schema, connection=conn1654)1655if name not in table_names:1656msg = (1657f"The provided table name '{name}' is not found exactly as "1658"such in the database after writing the table, possibly "1659"due to case sensitivity issues. Consider using lower "1660"case table names."1661)1662warnings.warn(msg, UserWarning)16631664def to_sql(1665self,1666frame,1667name,1668if_exists="fail",1669index=True,1670index_label=None,1671schema=None,1672chunksize=None,1673dtype: DtypeArg | None = None,1674method=None,1675engine="auto",1676**engine_kwargs,1677) -> int | None:1678"""1679Write records stored in a DataFrame to a SQL database.16801681Parameters1682----------1683frame : DataFrame1684name : string1685Name of SQL table.1686if_exists : {'fail', 'replace', 'append'}, default 'fail'1687- fail: If table exists, do nothing.1688- replace: If table exists, drop it, recreate it, and insert data.1689- append: If table exists, insert data. Create if does not exist.1690index : boolean, default True1691Write DataFrame index as a column.1692index_label : string or sequence, default None1693Column label for index column(s). If None is given (default) and1694`index` is True, then the index names are used.1695A sequence should be given if the DataFrame uses MultiIndex.1696schema : string, default None1697Name of SQL schema in database to write to (if database flavor1698supports this). If specified, this overwrites the default1699schema of the SQLDatabase object.1700chunksize : int, default None1701If not None, then rows will be written in batches of this size at a1702time. If None, all rows will be written at once.1703dtype : single type or dict of column name to SQL type, default None1704Optional specifying the datatype for columns. The SQL type should1705be a SQLAlchemy type. If all columns are of the same type, one1706single value can be used.1707method : {None', 'multi', callable}, default None1708Controls the SQL insertion clause used:17091710* None : Uses standard SQL ``INSERT`` clause (one per row).1711* 'multi': Pass multiple values in a single ``INSERT`` clause.1712* callable with signature ``(pd_table, conn, keys, data_iter)``.17131714Details and a sample callable implementation can be found in the1715section :ref:`insert method <io.sql.method>`.1716engine : {'auto', 'sqlalchemy'}, default 'auto'1717SQL engine library to use. If 'auto', then the option1718``io.sql.engine`` is used. The default ``io.sql.engine``1719behavior is 'sqlalchemy'17201721.. versionadded:: 1.3.017221723**engine_kwargs1724Any additional kwargs are passed to the engine.1725"""1726sql_engine = get_engine(engine)17271728table = self.prep_table(1729frame=frame,1730name=name,1731if_exists=if_exists,1732index=index,1733index_label=index_label,1734schema=schema,1735dtype=dtype,1736)17371738total_inserted = sql_engine.insert_records(1739table=table,1740con=self.connectable,1741frame=frame,1742name=name,1743index=index,1744schema=schema,1745chunksize=chunksize,1746method=method,1747**engine_kwargs,1748)17491750self.check_case_sensitive(name=name, schema=schema)1751return total_inserted17521753@property1754def tables(self):1755return self.meta.tables17561757def has_table(self, name: str, schema: str | None = None):1758if _gt14():1759from sqlalchemy import inspect17601761insp = inspect(self.connectable)1762return insp.has_table(name, schema or self.meta.schema)1763else:1764return self.connectable.run_callable(1765self.connectable.dialect.has_table, name, schema or self.meta.schema1766)17671768def get_table(self, table_name: str, schema: str | None = None):1769from sqlalchemy import (1770Numeric,1771Table,1772)17731774schema = schema or self.meta.schema1775tbl = Table(1776table_name, self.meta, autoload_with=self.connectable, schema=schema1777)1778for column in tbl.columns:1779if isinstance(column.type, Numeric):1780column.type.asdecimal = False1781return tbl17821783def drop_table(self, table_name: str, schema: str | None = None):1784schema = schema or self.meta.schema1785if self.has_table(table_name, schema):1786self.meta.reflect(bind=self.connectable, only=[table_name], schema=schema)1787self.get_table(table_name, schema).drop(bind=self.connectable)1788self.meta.clear()17891790def _create_sql_schema(1791self,1792frame: DataFrame,1793table_name: str,1794keys: list[str] | None = None,1795dtype: DtypeArg | None = None,1796schema: str | None = None,1797):1798table = SQLTable(1799table_name,1800self,1801frame=frame,1802index=False,1803keys=keys,1804dtype=dtype,1805schema=schema,1806)1807return str(table.sql_schema())180818091810# ---- SQL without SQLAlchemy ---1811# sqlite-specific sql strings and handler class1812# dictionary used for readability purposes1813_SQL_TYPES = {1814"string": "TEXT",1815"floating": "REAL",1816"integer": "INTEGER",1817"datetime": "TIMESTAMP",1818"date": "DATE",1819"time": "TIME",1820"boolean": "INTEGER",1821}182218231824def _get_unicode_name(name):1825try:1826uname = str(name).encode("utf-8", "strict").decode("utf-8")1827except UnicodeError as err:1828raise ValueError(f"Cannot convert identifier to UTF-8: '{name}'") from err1829return uname183018311832def _get_valid_sqlite_name(name):1833# See https://stackoverflow.com/questions/6514274/how-do-you-escape-strings\1834# -for-sqlite-table-column-names-in-python1835# Ensure the string can be encoded as UTF-8.1836# Ensure the string does not include any NUL characters.1837# Replace all " with "".1838# Wrap the entire thing in double quotes.18391840uname = _get_unicode_name(name)1841if not len(uname):1842raise ValueError("Empty table or column name specified")18431844nul_index = uname.find("\x00")1845if nul_index >= 0:1846raise ValueError("SQLite identifier cannot contain NULs")1847return '"' + uname.replace('"', '""') + '"'184818491850class SQLiteTable(SQLTable):1851"""1852Patch the SQLTable for fallback support.1853Instead of a table variable just use the Create Table statement.1854"""18551856def __init__(self, *args, **kwargs):1857# GH 83411858# register an adapter callable for datetime.time object1859import sqlite318601861# this will transform time(12,34,56,789) into '12:34:56.000789'1862# (this is what sqlalchemy does)1863sqlite3.register_adapter(time, lambda _: _.strftime("%H:%M:%S.%f"))1864super().__init__(*args, **kwargs)18651866def sql_schema(self):1867return str(";\n".join(self.table))18681869def _execute_create(self):1870with self.pd_sql.run_transaction() as conn:1871for stmt in self.table:1872conn.execute(stmt)18731874def insert_statement(self, *, num_rows: int):1875names = list(map(str, self.frame.columns))1876wld = "?" # wildcard char1877escape = _get_valid_sqlite_name18781879if self.index is not None:1880for idx in self.index[::-1]:1881names.insert(0, idx)18821883bracketed_names = [escape(column) for column in names]1884col_names = ",".join(bracketed_names)18851886row_wildcards = ",".join([wld] * len(names))1887wildcards = ",".join([f"({row_wildcards})" for _ in range(num_rows)])1888insert_statement = (1889f"INSERT INTO {escape(self.name)} ({col_names}) VALUES {wildcards}"1890)1891return insert_statement18921893def _execute_insert(self, conn, keys, data_iter) -> int:1894data_list = list(data_iter)1895conn.executemany(self.insert_statement(num_rows=1), data_list)1896return conn.rowcount18971898def _execute_insert_multi(self, conn, keys, data_iter) -> int:1899data_list = list(data_iter)1900flattened_data = [x for row in data_list for x in row]1901conn.execute(self.insert_statement(num_rows=len(data_list)), flattened_data)1902return conn.rowcount19031904def _create_table_setup(self):1905"""1906Return a list of SQL statements that creates a table reflecting the1907structure of a DataFrame. The first entry will be a CREATE TABLE1908statement while the rest will be CREATE INDEX statements.1909"""1910column_names_and_types = self._get_column_names_and_types(self._sql_type_name)1911escape = _get_valid_sqlite_name19121913create_tbl_stmts = [1914escape(cname) + " " + ctype for cname, ctype, _ in column_names_and_types1915]19161917if self.keys is not None and len(self.keys):1918if not is_list_like(self.keys):1919keys = [self.keys]1920else:1921keys = self.keys1922cnames_br = ", ".join([escape(c) for c in keys])1923create_tbl_stmts.append(1924f"CONSTRAINT {self.name}_pk PRIMARY KEY ({cnames_br})"1925)1926if self.schema:1927schema_name = self.schema + "."1928else:1929schema_name = ""1930create_stmts = [1931"CREATE TABLE "1932+ schema_name1933+ escape(self.name)1934+ " (\n"1935+ ",\n ".join(create_tbl_stmts)1936+ "\n)"1937]19381939ix_cols = [cname for cname, _, is_index in column_names_and_types if is_index]1940if len(ix_cols):1941cnames = "_".join(ix_cols)1942cnames_br = ",".join([escape(c) for c in ix_cols])1943create_stmts.append(1944"CREATE INDEX "1945+ escape("ix_" + self.name + "_" + cnames)1946+ "ON "1947+ escape(self.name)1948+ " ("1949+ cnames_br1950+ ")"1951)19521953return create_stmts19541955def _sql_type_name(self, col):1956dtype: DtypeArg = self.dtype or {}1957if is_dict_like(dtype):1958dtype = cast(dict, dtype)1959if col.name in dtype:1960return dtype[col.name]19611962# Infer type of column, while ignoring missing values.1963# Needed for inserting typed data containing NULLs, GH 8778.1964col_type = lib.infer_dtype(col, skipna=True)19651966if col_type == "timedelta64":1967warnings.warn(1968"the 'timedelta' type is not supported, and will be "1969"written as integer values (ns frequency) to the database.",1970UserWarning,1971stacklevel=find_stack_level(),1972)1973col_type = "integer"19741975elif col_type == "datetime64":1976col_type = "datetime"19771978elif col_type == "empty":1979col_type = "string"19801981elif col_type == "complex":1982raise ValueError("Complex datatypes not supported")19831984if col_type not in _SQL_TYPES:1985col_type = "string"19861987return _SQL_TYPES[col_type]198819891990class SQLiteDatabase(PandasSQL):1991"""1992Version of SQLDatabase to support SQLite connections (fallback without1993SQLAlchemy). This should only be used internally.19941995Parameters1996----------1997con : sqlite connection object19981999"""20002001def __init__(self, con):2002self.con = con20032004@contextmanager2005def run_transaction(self):2006cur = self.con.cursor()2007try:2008yield cur2009self.con.commit()2010except Exception:2011self.con.rollback()2012raise2013finally:2014cur.close()20152016def execute(self, *args, **kwargs):2017cur = self.con.cursor()2018try:2019cur.execute(*args, **kwargs)2020return cur2021except Exception as exc:2022try:2023self.con.rollback()2024except Exception as inner_exc: # pragma: no cover2025ex = DatabaseError(2026f"Execution failed on sql: {args[0]}\n{exc}\nunable to rollback"2027)2028raise ex from inner_exc20292030ex = DatabaseError(f"Execution failed on sql '{args[0]}': {exc}")2031raise ex from exc20322033@staticmethod2034def _query_iterator(2035cursor,2036chunksize: int,2037columns,2038index_col=None,2039coerce_float: bool = True,2040parse_dates=None,2041dtype: DtypeArg | None = None,2042):2043"""Return generator through chunked result set"""2044has_read_data = False2045while True:2046data = cursor.fetchmany(chunksize)2047if type(data) == tuple:2048data = list(data)2049if not data:2050cursor.close()2051if not has_read_data:2052yield DataFrame.from_records(2053[], columns=columns, coerce_float=coerce_float2054)2055break2056else:2057has_read_data = True2058yield _wrap_result(2059data,2060columns,2061index_col=index_col,2062coerce_float=coerce_float,2063parse_dates=parse_dates,2064dtype=dtype,2065)20662067def read_query(2068self,2069sql,2070index_col=None,2071coerce_float: bool = True,2072params=None,2073parse_dates=None,2074chunksize: int | None = None,2075dtype: DtypeArg | None = None,2076):20772078args = _convert_params(sql, params)2079cursor = self.execute(*args)2080columns = [col_desc[0] for col_desc in cursor.description]20812082if chunksize is not None:2083return self._query_iterator(2084cursor,2085chunksize,2086columns,2087index_col=index_col,2088coerce_float=coerce_float,2089parse_dates=parse_dates,2090dtype=dtype,2091)2092else:2093data = self._fetchall_as_list(cursor)2094cursor.close()20952096frame = _wrap_result(2097data,2098columns,2099index_col=index_col,2100coerce_float=coerce_float,2101parse_dates=parse_dates,2102dtype=dtype,2103)2104return frame21052106def _fetchall_as_list(self, cur):2107result = cur.fetchall()2108if not isinstance(result, list):2109result = list(result)2110return result21112112def to_sql(2113self,2114frame,2115name,2116if_exists="fail",2117index=True,2118index_label=None,2119schema=None,2120chunksize=None,2121dtype: DtypeArg | None = None,2122method=None,2123**kwargs,2124) -> int | None:2125"""2126Write records stored in a DataFrame to a SQL database.21272128Parameters2129----------2130frame: DataFrame2131name: string2132Name of SQL table.2133if_exists: {'fail', 'replace', 'append'}, default 'fail'2134fail: If table exists, do nothing.2135replace: If table exists, drop it, recreate it, and insert data.2136append: If table exists, insert data. Create if it does not exist.2137index : bool, default True2138Write DataFrame index as a column2139index_label : string or sequence, default None2140Column label for index column(s). If None is given (default) and2141`index` is True, then the index names are used.2142A sequence should be given if the DataFrame uses MultiIndex.2143schema : string, default None2144Ignored parameter included for compatibility with SQLAlchemy2145version of ``to_sql``.2146chunksize : int, default None2147If not None, then rows will be written in batches of this2148size at a time. If None, all rows will be written at once.2149dtype : single type or dict of column name to SQL type, default None2150Optional specifying the datatype for columns. The SQL type should2151be a string. If all columns are of the same type, one single value2152can be used.2153method : {None, 'multi', callable}, default None2154Controls the SQL insertion clause used:21552156* None : Uses standard SQL ``INSERT`` clause (one per row).2157* 'multi': Pass multiple values in a single ``INSERT`` clause.2158* callable with signature ``(pd_table, conn, keys, data_iter)``.21592160Details and a sample callable implementation can be found in the2161section :ref:`insert method <io.sql.method>`.2162"""2163if dtype:2164if not is_dict_like(dtype):2165# error: Value expression in dictionary comprehension has incompatible2166# type "Union[ExtensionDtype, str, dtype[Any], Type[object],2167# Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]],2168# Type[str], Type[float], Type[int], Type[complex], Type[bool],2169# Type[object]]]]"; expected type "Union[ExtensionDtype, str,2170# dtype[Any], Type[object]]"2171dtype = {col_name: dtype for col_name in frame} # type: ignore[misc]2172else:2173dtype = cast(dict, dtype)21742175for col, my_type in dtype.items():2176if not isinstance(my_type, str):2177raise ValueError(f"{col} ({my_type}) not a string")21782179table = SQLiteTable(2180name,2181self,2182frame=frame,2183index=index,2184if_exists=if_exists,2185index_label=index_label,2186dtype=dtype,2187)2188table.create()2189return table.insert(chunksize, method)21902191def has_table(self, name: str, schema: str | None = None):21922193wld = "?"2194query = f"SELECT name FROM sqlite_master WHERE type='table' AND name={wld};"21952196return len(self.execute(query, [name]).fetchall()) > 021972198def get_table(self, table_name: str, schema: str | None = None):2199return None # not supported in fallback mode22002201def drop_table(self, name: str, schema: str | None = None):2202drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}"2203self.execute(drop_sql)22042205def _create_sql_schema(2206self,2207frame,2208table_name: str,2209keys=None,2210dtype: DtypeArg | None = None,2211schema: str | None = None,2212):2213table = SQLiteTable(2214table_name,2215self,2216frame=frame,2217index=False,2218keys=keys,2219dtype=dtype,2220schema=schema,2221)2222return str(table.sql_schema())222322242225def get_schema(2226frame,2227name: str,2228keys=None,2229con=None,2230dtype: DtypeArg | None = None,2231schema: str | None = None,2232):2233"""2234Get the SQL db table schema for the given frame.22352236Parameters2237----------2238frame : DataFrame2239name : str2240name of SQL table2241keys : string or sequence, default: None2242columns to use a primary key2243con: an open SQL database connection object or a SQLAlchemy connectable2244Using SQLAlchemy makes it possible to use any DB supported by that2245library, default: None2246If a DBAPI2 object, only sqlite3 is supported.2247dtype : dict of column name to SQL type, default None2248Optional specifying the datatype for columns. The SQL type should2249be a SQLAlchemy type, or a string for sqlite3 fallback connection.2250schema: str, default: None2251Optional specifying the schema to be used in creating the table.22522253.. versionadded:: 1.2.02254"""2255pandas_sql = pandasSQL_builder(con=con)2256return pandas_sql._create_sql_schema(2257frame, name, keys=keys, dtype=dtype, schema=schema2258)225922602261