Path: blob/main/singlestoredb/converters.py
469 views
#!/usr/bin/env python1"""Data value conversion utilities."""2import datetime3import re4import struct5from base64 import b64decode6from decimal import Decimal7from json import loads as json_loads8from typing import Any9from typing import Callable10from typing import Dict11from typing import List12from typing import Optional13from typing import Set14from typing import Union1516try:17import shapely.wkt18has_shapely = True19except ImportError:20has_shapely = False2122try:23import pygeos24has_pygeos = True25except (AttributeError, ImportError):26has_pygeos = False2728try:29import numpy30has_numpy = True31except ImportError:32has_numpy = False3334try:35import bson36has_bson = True37except ImportError:38has_bson = False394041# Cache fromisoformat methods if they exist42# Cache fromisoformat methods if they exist43_dt_datetime_fromisoformat = None44if hasattr(datetime.datetime, 'fromisoformat'):45_dt_datetime_fromisoformat = datetime.datetime.fromisoformat # type: ignore46_dt_time_fromisoformat = None47if hasattr(datetime.time, 'fromisoformat'):48_dt_time_fromisoformat = datetime.time.fromisoformat # type: ignore49_dt_date_fromisoformat = None50if hasattr(datetime.date, 'fromisoformat'):51_dt_date_fromisoformat = datetime.date.fromisoformat # type: ignore525354def _convert_second_fraction(s: str) -> int:55if not s:56return 057# Pad zeros to ensure the fraction length in microseconds58s = s.ljust(6, '0')59return int(s[:6])606162DATETIME_RE = re.compile(63r'(\d{1,4})-(\d{1,2})-(\d{1,2})[T ](\d{1,2}):(\d{1,2}):(\d{1,2})(?:.(\d{1,6}))?',64)6566ZERO_DATETIMES = set([67'0000-00-00 00:00:00',68'0000-00-00 00:00:00.000',69'0000-00-00 00:00:00.000000',70'0000-00-00T00:00:00',71'0000-00-00T00:00:00.000',72'0000-00-00T00:00:00.000000',73])74ZERO_DATES = set([75'0000-00-00',76])777879def datetime_fromisoformat(80obj: Union[str, bytes, bytearray],81) -> Union[datetime.datetime, str, None]:82"""Returns a DATETIME or TIMESTAMP column value as a datetime object:8384>>> datetime_fromisoformat('2007-02-25 23:06:20')85datetime.datetime(2007, 2, 25, 23, 6, 20)86>>> datetime_fromisoformat('2007-02-25T23:06:20')87datetime.datetime(2007, 2, 25, 23, 6, 20)8889Illegal values are returned as str or None:9091>>> datetime_fromisoformat('2007-02-31T23:06:20')92'2007-02-31T23:06:20'93>>> datetime_fromisoformat('0000-00-00 00:00:00')94None9596"""97if isinstance(obj, (bytes, bytearray)):98obj = obj.decode('ascii')99100if obj in ZERO_DATETIMES:101return None102103# Use datetime methods if possible104if _dt_datetime_fromisoformat is not None:105try:106if ' ' in obj or 'T' in obj:107return _dt_datetime_fromisoformat(obj)108if _dt_date_fromisoformat is not None:109date = _dt_date_fromisoformat(obj)110return datetime.datetime(date.year, date.month, date.day)111except ValueError:112return obj113114m = DATETIME_RE.match(obj)115if not m:116mdate = date_fromisoformat(obj)117if type(mdate) is str:118return mdate119return datetime.datetime(mdate.year, mdate.month, mdate.day) # type: ignore120121try:122groups = list(m.groups())123groups[-1] = _convert_second_fraction(groups[-1])124return datetime.datetime(*[int(x) for x in groups]) # type: ignore125except ValueError:126mdate = date_fromisoformat(obj)127if type(mdate) is str:128return mdate129return datetime.datetime(mdate.year, mdate.month, mdate.day) # type: ignore130131132TIMEDELTA_RE = re.compile(r'(-)?(\d{1,3}):(\d{1,2}):(\d{1,2})(?:.(\d{1,6}))?')133134135def timedelta_fromisoformat(136obj: Union[str, bytes, bytearray],137) -> Union[datetime.timedelta, str, None]:138"""Returns a TIME column as a timedelta object:139140>>> timedelta_fromisoformat('25:06:17')141datetime.timedelta(days=1, seconds=3977)142>>> timedelta_fromisoformat('-25:06:17')143datetime.timedelta(days=-2, seconds=82423)144145Illegal values are returned as string:146147>>> timedelta_fromisoformat('random crap')148'random crap'149150Note that MySQL always returns TIME columns as (+|-)HH:MM:SS, but151can accept values as (+|-)DD HH:MM:SS. The latter format will not152be parsed correctly by this function.153"""154if isinstance(obj, (bytes, bytearray)):155obj = obj.decode('ascii')156157m = TIMEDELTA_RE.match(obj)158if not m:159return obj160161try:162groups = list(m.groups())163groups[-1] = _convert_second_fraction(groups[-1])164negate = -1 if groups[0] else 1165hours, minutes, seconds, microseconds = groups[1:]166167tdelta = (168datetime.timedelta(169hours=int(hours),170minutes=int(minutes),171seconds=int(seconds),172microseconds=int(microseconds),173)174* negate175)176return tdelta177except ValueError:178return obj179180181TIME_RE = re.compile(r'(\d{1,2}):(\d{1,2}):(\d{1,2})(?:.(\d{1,6}))?')182183184def time_fromisoformat(185obj: Union[str, bytes, bytearray],186) -> Union[datetime.time, str, None]:187"""Returns a TIME column as a time object:188189>>> time_fromisoformat('15:06:17')190datetime.time(15, 6, 17)191192Illegal values are returned as str:193194>>> time_fromisoformat('-25:06:17')195'-25:06:17'196>>> time_fromisoformat('random crap')197'random crap'198199Note that MySQL always returns TIME columns as (+|-)HH:MM:SS, but200can accept values as (+|-)DD HH:MM:SS. The latter format will not201be parsed correctly by this function.202203Also note that MySQL's TIME column corresponds more closely to204Python's timedelta and not time. However if you want TIME columns205to be treated as time-of-day and not a time offset, then you can206use set this function as the converter for FIELD_TYPE.TIME.207"""208if isinstance(obj, (bytes, bytearray)):209obj = obj.decode('ascii')210211# Use datetime methods if possible212if _dt_time_fromisoformat is not None:213try:214return _dt_time_fromisoformat(obj)215except ValueError:216return obj217218m = TIME_RE.match(obj)219if not m:220return obj221222try:223groups = list(m.groups())224groups[-1] = _convert_second_fraction(groups[-1])225hours, minutes, seconds, microseconds = groups226return datetime.time(227hour=int(hours),228minute=int(minutes),229second=int(seconds),230microsecond=int(microseconds),231)232except ValueError:233return obj234235236def date_fromisoformat(237obj: Union[str, bytes, bytearray],238) -> Union[datetime.date, str, None]:239"""Returns a DATE column as a date object:240241>>> date_fromisoformat('2007-02-26')242datetime.date(2007, 2, 26)243244Illegal values are returned as str or None:245246>>> date_fromisoformat('2007-02-31')247'2007-02-31'248>>> date_fromisoformat('0000-00-00')249None250251"""252if isinstance(obj, (bytes, bytearray)):253obj = obj.decode('ascii')254255if obj in ZERO_DATES:256return None257258# Use datetime methods if possible259if _dt_date_fromisoformat is not None:260try:261return _dt_date_fromisoformat(obj)262except ValueError:263return obj264265try:266return datetime.date(*[int(x) for x in obj.split('-', 2)])267except ValueError:268return obj269270271def identity(x: Any) -> Optional[Any]:272"""Return input value."""273return x274275276def bit_or_none(x: Any) -> Optional[bytes]:277"""278Convert value to bit.279280Parameters281----------282x : Any283Arbitrary value284285Returns286-------287int288If value can be cast to a bit289None290If input value is None291292"""293if x is None:294return None295if type(x) is str:296return b64decode(x)297return x298299300def int_or_none(x: Any) -> Optional[int]:301"""302Convert value to int.303304Parameters305----------306x : Any307Arbitrary value308309Returns310-------311int312If value can be cast to an int313None314If input value is None315316"""317if x is None:318return None319return int(x)320321322def float_or_none(x: Any) -> Optional[float]:323"""324Convert value to float.325326Parameters327----------328x : Any329Arbitrary value330331Returns332-------333float334If value can be cast to a float335None336If input value is None337338"""339if x is None:340return None341return float(x)342343344def decimal_or_none(x: Any) -> Optional[Decimal]:345"""346Convert value to decimal.347348Parameters349----------350x : Any351Arbitrary value352353Returns354-------355decimal.Decimal356If value can be cast to a decimal357None358If input value is None359360"""361if x is None:362return None363return Decimal(x)364365366def date_or_none(x: Optional[str]) -> Optional[Union[datetime.date, str]]:367"""368Convert value to a date.369370Parameters371----------372x : Any373Arbitrary value374375Returns376-------377datetime.date378If value can be cast to a date379None380If input value is None381382"""383if x is None:384return None385return date_fromisoformat(x)386387388def timedelta_or_none(x: Optional[str]) -> Optional[Union[datetime.timedelta, str]]:389"""390Convert value to a timedelta.391392Parameters393----------394x : Any395Arbitrary value396397Returns398-------399datetime.timedelta400If value can be cast to a time401None402If input value is None403404"""405if x is None:406return None407return timedelta_fromisoformat(x)408409410def time_or_none(x: Optional[str]) -> Optional[Union[datetime.time, str]]:411"""412Convert value to a time.413414Parameters415----------416x : Any417Arbitrary value418419Returns420-------421datetime.time422If value can be cast to a time423None424If input value is None425426"""427if x is None:428return None429return time_fromisoformat(x)430431432def datetime_or_none(x: Optional[str]) -> Optional[Union[datetime.datetime, str]]:433"""434Convert value to a datetime.435436Parameters437----------438x : Any439Arbitrary value440441Returns442-------443datetime.time444If value can be cast to a datetime445None446If input value is None447448"""449if x is None:450return None451return datetime_fromisoformat(x)452453454def none(x: Any) -> None:455"""456Return None.457458Parameters459----------460x : Any461Arbitrary value462463Returns464-------465None466467"""468return None469470471def json_or_none(x: Optional[str]) -> Optional[Union[Dict[str, Any], List[Any]]]:472"""473Convert JSON to dict or list.474475Parameters476----------477x : str478JSON string479480Returns481-------482dict483If JSON string contains an object484list485If JSON string contains a list486None487If input value is None488489"""490if x is None:491return None492return json_loads(x)493494495def set_or_none(x: Optional[str]) -> Optional[Set[str]]:496"""497Convert value to set of strings.498499Parameters500----------501x : str502Input string value503504Returns505-------506set of strings507If value contains a set of strings508None509If input value is None510511"""512if x is None:513return None514return set(y.strip() for y in x.split(','))515516517def geometry_or_none(x: Optional[str]) -> Optional[Any]:518"""519Convert value to geometry coordinates.520521Parameters522----------523x : Any524Geometry value525526Returns527-------528shapely object or pygeos object or str529If value is valid geometry value530None531If input value is None or empty532533"""534if x is None or not x:535return None536if has_shapely:537return shapely.wkt.loads(x)538if has_pygeos:539return pygeos.io.from_wkt(x)540return x541542543def float32_vector_json_or_none(x: Optional[str]) -> Optional[Any]:544"""545Covert value to float32 array.546547Parameters548----------549x : str or None550JSON array551552Returns553-------554float32 numpy array555If input value is not None and numpy is installed556float Python list557If input value is not None and numpy is not installed558None559If input value is None560561"""562if x is None:563return None564565if has_numpy:566return numpy.array(json_loads(x), dtype=numpy.float32)567568return map(float, json_loads(x))569570571def float32_vector_or_none(x: Optional[bytes]) -> Optional[Any]:572"""573Covert value to float32 array.574575Parameters576----------577x : bytes or None578Little-endian block of bytes.579580Returns581-------582float32 numpy array583If input value is not None and numpy is installed584float Python list585If input value is not None and numpy is not installed586None587If input value is None588589"""590if x is None:591return None592593if has_numpy:594return numpy.frombuffer(x, dtype=numpy.float32)595596return struct.unpack(f'<{len(x)//4}f', x)597598599def float64_vector_json_or_none(x: Optional[str]) -> Optional[Any]:600"""601Covert value to float64 array.602603Parameters604----------605x : str or None606JSON array607608Returns609-------610float64 numpy array611If input value is not None and numpy is installed612float Python list613If input value is not None and numpy is not installed614None615If input value is None616617"""618if x is None:619return None620621if has_numpy:622return numpy.array(json_loads(x), dtype=numpy.float64)623624return map(float, json_loads(x))625626627def float64_vector_or_none(x: Optional[bytes]) -> Optional[Any]:628"""629Covert value to float64 array.630631Parameters632----------633x : bytes or None634JSON array635636Returns637-------638float64 numpy array639If input value is not None and numpy is installed640float Python list641If input value is not None and numpy is not installed642None643If input value is None644645"""646if x is None:647return None648649if has_numpy:650return numpy.frombuffer(x, dtype=numpy.float64)651652return struct.unpack(f'<{len(x)//8}d', x)653654655def int8_vector_json_or_none(x: Optional[str]) -> Optional[Any]:656"""657Covert value to int8 array.658659Parameters660----------661x : str or None662JSON array663664Returns665-------666int8 numpy array667If input value is not None and numpy is installed668int Python list669If input value is not None and numpy is not installed670None671If input value is None672673"""674if x is None:675return None676677if has_numpy:678return numpy.array(json_loads(x), dtype=numpy.int8)679680return map(int, json_loads(x))681682683def int8_vector_or_none(x: Optional[bytes]) -> Optional[Any]:684"""685Covert value to int8 array.686687Parameters688----------689x : bytes or None690Little-endian block of bytes.691692Returns693-------694int8 numpy array695If input value is not None and numpy is installed696int Python list697If input value is not None and numpy is not installed698None699If input value is None700701"""702if x is None:703return None704705if has_numpy:706return numpy.frombuffer(x, dtype=numpy.int8)707708return struct.unpack(f'<{len(x)}b', x)709710711def int16_vector_json_or_none(x: Optional[str]) -> Optional[Any]:712"""713Covert value to int16 array.714715Parameters716----------717x : str or None718JSON array719720Returns721-------722int16 numpy array723If input value is not None and numpy is installed724int Python list725If input value is not None and numpy is not installed726None727If input value is None728729"""730if x is None:731return None732733if has_numpy:734return numpy.array(json_loads(x), dtype=numpy.int16)735736return map(int, json_loads(x))737738739def int16_vector_or_none(x: Optional[bytes]) -> Optional[Any]:740"""741Covert value to int16 array.742743Parameters744----------745x : bytes or None746Little-endian block of bytes.747748Returns749-------750int16 numpy array751If input value is not None and numpy is installed752int Python list753If input value is not None and numpy is not installed754None755If input value is None756757"""758if x is None:759return None760761if has_numpy:762return numpy.frombuffer(x, dtype=numpy.int16)763764return struct.unpack(f'<{len(x)//2}h', x)765766767def int32_vector_json_or_none(x: Optional[str]) -> Optional[Any]:768"""769Covert value to int32 array.770771Parameters772----------773x : str or None774JSON array775776Returns777-------778int32 numpy array779If input value is not None and numpy is installed780int Python list781If input value is not None and numpy is not installed782None783If input value is None784785"""786if x is None:787return None788789if has_numpy:790return numpy.array(json_loads(x), dtype=numpy.int32)791792return map(int, json_loads(x))793794795def int32_vector_or_none(x: Optional[bytes]) -> Optional[Any]:796"""797Covert value to int32 array.798799Parameters800----------801x : bytes or None802Little-endian block of bytes.803804Returns805-------806int32 numpy array807If input value is not None and numpy is installed808int Python list809If input value is not None and numpy is not installed810None811If input value is None812813"""814if x is None:815return None816817if has_numpy:818return numpy.frombuffer(x, dtype=numpy.int32)819820return struct.unpack(f'<{len(x)//4}l', x)821822823def int64_vector_json_or_none(x: Optional[str]) -> Optional[Any]:824"""825Covert value to int64 array.826827Parameters828----------829x : str or None830JSON array831832Returns833-------834int64 numpy array835If input value is not None and numpy is installed836int Python list837If input value is not None and numpy is not installed838None839If input value is None840841"""842if x is None:843return None844845if has_numpy:846return numpy.array(json_loads(x), dtype=numpy.int64)847848return map(int, json_loads(x))849850851def int64_vector_or_none(x: Optional[bytes]) -> Optional[Any]:852"""853Covert value to int64 array.854855Parameters856----------857x : bytes or None858Little-endian block of bytes.859860Returns861-------862int64 numpy array863If input value is not None and numpy is installed864int Python list865If input value is not None and numpy is not installed866None867If input value is None868869"""870if x is None:871return None872873# Bytes874if has_numpy:875return numpy.frombuffer(x, dtype=numpy.int64)876877return struct.unpack(f'<{len(x)//8}l', x)878879880def bson_or_none(x: Optional[bytes]) -> Optional[Any]:881"""882Convert a BSON value to a dictionary.883884Parameters885----------886x : bytes or None887BSON formatted bytes888889Returns890-------891dict892If input value is not None and bson package is installed893bytes894If input value is not None and bson package is not installed895None896If input value is None897898"""899if x is None:900return None901if has_bson:902return bson.decode(x)903return x904905906# Map of database types and conversion functions907converters: Dict[int, Callable[..., Any]] = {9080: decimal_or_none,9091: int_or_none,9102: int_or_none,9113: int_or_none,9124: float_or_none,9135: float_or_none,9146: none,9157: datetime_or_none,9168: int_or_none,9179: int_or_none,91810: date_or_none,91911: timedelta_or_none,92012: datetime_or_none,92113: int_or_none,92214: date_or_none,923# 15: identity,92416: bit_or_none,925245: json_or_none,926246: decimal_or_none,927# 247: identity,928248: set_or_none,929# 249: identity,930# 250: identity,931# 251: identity,932# 252: identity,933# 253: identity,934# 254: identity,935255: geometry_or_none,9361001: bson_or_none,9372001: float32_vector_json_or_none,9382002: float64_vector_json_or_none,9392003: int8_vector_json_or_none,9402004: int16_vector_json_or_none,9412005: int32_vector_json_or_none,9422006: int64_vector_json_or_none,9433001: float32_vector_or_none,9443002: float64_vector_or_none,9453003: int8_vector_or_none,9463004: int16_vector_or_none,9473005: int32_vector_or_none,9483006: int64_vector_or_none,949}950951952