Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
singlestore-labs
GitHub Repository: singlestore-labs/singlestoredb-python
Path: blob/main/singlestoredb/converters.py
469 views
1
#!/usr/bin/env python
2
"""Data value conversion utilities."""
3
import datetime
4
import re
5
import struct
6
from base64 import b64decode
7
from decimal import Decimal
8
from json import loads as json_loads
9
from typing import Any
10
from typing import Callable
11
from typing import Dict
12
from typing import List
13
from typing import Optional
14
from typing import Set
15
from typing import Union
16
17
try:
18
import shapely.wkt
19
has_shapely = True
20
except ImportError:
21
has_shapely = False
22
23
try:
24
import pygeos
25
has_pygeos = True
26
except (AttributeError, ImportError):
27
has_pygeos = False
28
29
try:
30
import numpy
31
has_numpy = True
32
except ImportError:
33
has_numpy = False
34
35
try:
36
import bson
37
has_bson = True
38
except ImportError:
39
has_bson = False
40
41
42
# Cache fromisoformat methods if they exist
43
# Cache fromisoformat methods if they exist
44
_dt_datetime_fromisoformat = None
45
if hasattr(datetime.datetime, 'fromisoformat'):
46
_dt_datetime_fromisoformat = datetime.datetime.fromisoformat # type: ignore
47
_dt_time_fromisoformat = None
48
if hasattr(datetime.time, 'fromisoformat'):
49
_dt_time_fromisoformat = datetime.time.fromisoformat # type: ignore
50
_dt_date_fromisoformat = None
51
if hasattr(datetime.date, 'fromisoformat'):
52
_dt_date_fromisoformat = datetime.date.fromisoformat # type: ignore
53
54
55
def _convert_second_fraction(s: str) -> int:
56
if not s:
57
return 0
58
# Pad zeros to ensure the fraction length in microseconds
59
s = s.ljust(6, '0')
60
return int(s[:6])
61
62
63
DATETIME_RE = re.compile(
64
r'(\d{1,4})-(\d{1,2})-(\d{1,2})[T ](\d{1,2}):(\d{1,2}):(\d{1,2})(?:.(\d{1,6}))?',
65
)
66
67
ZERO_DATETIMES = set([
68
'0000-00-00 00:00:00',
69
'0000-00-00 00:00:00.000',
70
'0000-00-00 00:00:00.000000',
71
'0000-00-00T00:00:00',
72
'0000-00-00T00:00:00.000',
73
'0000-00-00T00:00:00.000000',
74
])
75
ZERO_DATES = set([
76
'0000-00-00',
77
])
78
79
80
def datetime_fromisoformat(
81
obj: Union[str, bytes, bytearray],
82
) -> Union[datetime.datetime, str, None]:
83
"""Returns a DATETIME or TIMESTAMP column value as a datetime object:
84
85
>>> datetime_fromisoformat('2007-02-25 23:06:20')
86
datetime.datetime(2007, 2, 25, 23, 6, 20)
87
>>> datetime_fromisoformat('2007-02-25T23:06:20')
88
datetime.datetime(2007, 2, 25, 23, 6, 20)
89
90
Illegal values are returned as str or None:
91
92
>>> datetime_fromisoformat('2007-02-31T23:06:20')
93
'2007-02-31T23:06:20'
94
>>> datetime_fromisoformat('0000-00-00 00:00:00')
95
None
96
97
"""
98
if isinstance(obj, (bytes, bytearray)):
99
obj = obj.decode('ascii')
100
101
if obj in ZERO_DATETIMES:
102
return None
103
104
# Use datetime methods if possible
105
if _dt_datetime_fromisoformat is not None:
106
try:
107
if ' ' in obj or 'T' in obj:
108
return _dt_datetime_fromisoformat(obj)
109
if _dt_date_fromisoformat is not None:
110
date = _dt_date_fromisoformat(obj)
111
return datetime.datetime(date.year, date.month, date.day)
112
except ValueError:
113
return obj
114
115
m = DATETIME_RE.match(obj)
116
if not m:
117
mdate = date_fromisoformat(obj)
118
if type(mdate) is str:
119
return mdate
120
return datetime.datetime(mdate.year, mdate.month, mdate.day) # type: ignore
121
122
try:
123
groups = list(m.groups())
124
groups[-1] = _convert_second_fraction(groups[-1])
125
return datetime.datetime(*[int(x) for x in groups]) # type: ignore
126
except ValueError:
127
mdate = date_fromisoformat(obj)
128
if type(mdate) is str:
129
return mdate
130
return datetime.datetime(mdate.year, mdate.month, mdate.day) # type: ignore
131
132
133
TIMEDELTA_RE = re.compile(r'(-)?(\d{1,3}):(\d{1,2}):(\d{1,2})(?:.(\d{1,6}))?')
134
135
136
def timedelta_fromisoformat(
137
obj: Union[str, bytes, bytearray],
138
) -> Union[datetime.timedelta, str, None]:
139
"""Returns a TIME column as a timedelta object:
140
141
>>> timedelta_fromisoformat('25:06:17')
142
datetime.timedelta(days=1, seconds=3977)
143
>>> timedelta_fromisoformat('-25:06:17')
144
datetime.timedelta(days=-2, seconds=82423)
145
146
Illegal values are returned as string:
147
148
>>> timedelta_fromisoformat('random crap')
149
'random crap'
150
151
Note that MySQL always returns TIME columns as (+|-)HH:MM:SS, but
152
can accept values as (+|-)DD HH:MM:SS. The latter format will not
153
be parsed correctly by this function.
154
"""
155
if isinstance(obj, (bytes, bytearray)):
156
obj = obj.decode('ascii')
157
158
m = TIMEDELTA_RE.match(obj)
159
if not m:
160
return obj
161
162
try:
163
groups = list(m.groups())
164
groups[-1] = _convert_second_fraction(groups[-1])
165
negate = -1 if groups[0] else 1
166
hours, minutes, seconds, microseconds = groups[1:]
167
168
tdelta = (
169
datetime.timedelta(
170
hours=int(hours),
171
minutes=int(minutes),
172
seconds=int(seconds),
173
microseconds=int(microseconds),
174
)
175
* negate
176
)
177
return tdelta
178
except ValueError:
179
return obj
180
181
182
TIME_RE = re.compile(r'(\d{1,2}):(\d{1,2}):(\d{1,2})(?:.(\d{1,6}))?')
183
184
185
def time_fromisoformat(
186
obj: Union[str, bytes, bytearray],
187
) -> Union[datetime.time, str, None]:
188
"""Returns a TIME column as a time object:
189
190
>>> time_fromisoformat('15:06:17')
191
datetime.time(15, 6, 17)
192
193
Illegal values are returned as str:
194
195
>>> time_fromisoformat('-25:06:17')
196
'-25:06:17'
197
>>> time_fromisoformat('random crap')
198
'random crap'
199
200
Note that MySQL always returns TIME columns as (+|-)HH:MM:SS, but
201
can accept values as (+|-)DD HH:MM:SS. The latter format will not
202
be parsed correctly by this function.
203
204
Also note that MySQL's TIME column corresponds more closely to
205
Python's timedelta and not time. However if you want TIME columns
206
to be treated as time-of-day and not a time offset, then you can
207
use set this function as the converter for FIELD_TYPE.TIME.
208
"""
209
if isinstance(obj, (bytes, bytearray)):
210
obj = obj.decode('ascii')
211
212
# Use datetime methods if possible
213
if _dt_time_fromisoformat is not None:
214
try:
215
return _dt_time_fromisoformat(obj)
216
except ValueError:
217
return obj
218
219
m = TIME_RE.match(obj)
220
if not m:
221
return obj
222
223
try:
224
groups = list(m.groups())
225
groups[-1] = _convert_second_fraction(groups[-1])
226
hours, minutes, seconds, microseconds = groups
227
return datetime.time(
228
hour=int(hours),
229
minute=int(minutes),
230
second=int(seconds),
231
microsecond=int(microseconds),
232
)
233
except ValueError:
234
return obj
235
236
237
def date_fromisoformat(
238
obj: Union[str, bytes, bytearray],
239
) -> Union[datetime.date, str, None]:
240
"""Returns a DATE column as a date object:
241
242
>>> date_fromisoformat('2007-02-26')
243
datetime.date(2007, 2, 26)
244
245
Illegal values are returned as str or None:
246
247
>>> date_fromisoformat('2007-02-31')
248
'2007-02-31'
249
>>> date_fromisoformat('0000-00-00')
250
None
251
252
"""
253
if isinstance(obj, (bytes, bytearray)):
254
obj = obj.decode('ascii')
255
256
if obj in ZERO_DATES:
257
return None
258
259
# Use datetime methods if possible
260
if _dt_date_fromisoformat is not None:
261
try:
262
return _dt_date_fromisoformat(obj)
263
except ValueError:
264
return obj
265
266
try:
267
return datetime.date(*[int(x) for x in obj.split('-', 2)])
268
except ValueError:
269
return obj
270
271
272
def identity(x: Any) -> Optional[Any]:
273
"""Return input value."""
274
return x
275
276
277
def bit_or_none(x: Any) -> Optional[bytes]:
278
"""
279
Convert value to bit.
280
281
Parameters
282
----------
283
x : Any
284
Arbitrary value
285
286
Returns
287
-------
288
int
289
If value can be cast to a bit
290
None
291
If input value is None
292
293
"""
294
if x is None:
295
return None
296
if type(x) is str:
297
return b64decode(x)
298
return x
299
300
301
def int_or_none(x: Any) -> Optional[int]:
302
"""
303
Convert value to int.
304
305
Parameters
306
----------
307
x : Any
308
Arbitrary value
309
310
Returns
311
-------
312
int
313
If value can be cast to an int
314
None
315
If input value is None
316
317
"""
318
if x is None:
319
return None
320
return int(x)
321
322
323
def float_or_none(x: Any) -> Optional[float]:
324
"""
325
Convert value to float.
326
327
Parameters
328
----------
329
x : Any
330
Arbitrary value
331
332
Returns
333
-------
334
float
335
If value can be cast to a float
336
None
337
If input value is None
338
339
"""
340
if x is None:
341
return None
342
return float(x)
343
344
345
def decimal_or_none(x: Any) -> Optional[Decimal]:
346
"""
347
Convert value to decimal.
348
349
Parameters
350
----------
351
x : Any
352
Arbitrary value
353
354
Returns
355
-------
356
decimal.Decimal
357
If value can be cast to a decimal
358
None
359
If input value is None
360
361
"""
362
if x is None:
363
return None
364
return Decimal(x)
365
366
367
def date_or_none(x: Optional[str]) -> Optional[Union[datetime.date, str]]:
368
"""
369
Convert value to a date.
370
371
Parameters
372
----------
373
x : Any
374
Arbitrary value
375
376
Returns
377
-------
378
datetime.date
379
If value can be cast to a date
380
None
381
If input value is None
382
383
"""
384
if x is None:
385
return None
386
return date_fromisoformat(x)
387
388
389
def timedelta_or_none(x: Optional[str]) -> Optional[Union[datetime.timedelta, str]]:
390
"""
391
Convert value to a timedelta.
392
393
Parameters
394
----------
395
x : Any
396
Arbitrary value
397
398
Returns
399
-------
400
datetime.timedelta
401
If value can be cast to a time
402
None
403
If input value is None
404
405
"""
406
if x is None:
407
return None
408
return timedelta_fromisoformat(x)
409
410
411
def time_or_none(x: Optional[str]) -> Optional[Union[datetime.time, str]]:
412
"""
413
Convert value to a time.
414
415
Parameters
416
----------
417
x : Any
418
Arbitrary value
419
420
Returns
421
-------
422
datetime.time
423
If value can be cast to a time
424
None
425
If input value is None
426
427
"""
428
if x is None:
429
return None
430
return time_fromisoformat(x)
431
432
433
def datetime_or_none(x: Optional[str]) -> Optional[Union[datetime.datetime, str]]:
434
"""
435
Convert value to a datetime.
436
437
Parameters
438
----------
439
x : Any
440
Arbitrary value
441
442
Returns
443
-------
444
datetime.time
445
If value can be cast to a datetime
446
None
447
If input value is None
448
449
"""
450
if x is None:
451
return None
452
return datetime_fromisoformat(x)
453
454
455
def none(x: Any) -> None:
456
"""
457
Return None.
458
459
Parameters
460
----------
461
x : Any
462
Arbitrary value
463
464
Returns
465
-------
466
None
467
468
"""
469
return None
470
471
472
def json_or_none(x: Optional[str]) -> Optional[Union[Dict[str, Any], List[Any]]]:
473
"""
474
Convert JSON to dict or list.
475
476
Parameters
477
----------
478
x : str
479
JSON string
480
481
Returns
482
-------
483
dict
484
If JSON string contains an object
485
list
486
If JSON string contains a list
487
None
488
If input value is None
489
490
"""
491
if x is None:
492
return None
493
return json_loads(x)
494
495
496
def set_or_none(x: Optional[str]) -> Optional[Set[str]]:
497
"""
498
Convert value to set of strings.
499
500
Parameters
501
----------
502
x : str
503
Input string value
504
505
Returns
506
-------
507
set of strings
508
If value contains a set of strings
509
None
510
If input value is None
511
512
"""
513
if x is None:
514
return None
515
return set(y.strip() for y in x.split(','))
516
517
518
def geometry_or_none(x: Optional[str]) -> Optional[Any]:
519
"""
520
Convert value to geometry coordinates.
521
522
Parameters
523
----------
524
x : Any
525
Geometry value
526
527
Returns
528
-------
529
shapely object or pygeos object or str
530
If value is valid geometry value
531
None
532
If input value is None or empty
533
534
"""
535
if x is None or not x:
536
return None
537
if has_shapely:
538
return shapely.wkt.loads(x)
539
if has_pygeos:
540
return pygeos.io.from_wkt(x)
541
return x
542
543
544
def float32_vector_json_or_none(x: Optional[str]) -> Optional[Any]:
545
"""
546
Covert value to float32 array.
547
548
Parameters
549
----------
550
x : str or None
551
JSON array
552
553
Returns
554
-------
555
float32 numpy array
556
If input value is not None and numpy is installed
557
float Python list
558
If input value is not None and numpy is not installed
559
None
560
If input value is None
561
562
"""
563
if x is None:
564
return None
565
566
if has_numpy:
567
return numpy.array(json_loads(x), dtype=numpy.float32)
568
569
return map(float, json_loads(x))
570
571
572
def float32_vector_or_none(x: Optional[bytes]) -> Optional[Any]:
573
"""
574
Covert value to float32 array.
575
576
Parameters
577
----------
578
x : bytes or None
579
Little-endian block of bytes.
580
581
Returns
582
-------
583
float32 numpy array
584
If input value is not None and numpy is installed
585
float Python list
586
If input value is not None and numpy is not installed
587
None
588
If input value is None
589
590
"""
591
if x is None:
592
return None
593
594
if has_numpy:
595
return numpy.frombuffer(x, dtype=numpy.float32)
596
597
return struct.unpack(f'<{len(x)//4}f', x)
598
599
600
def float64_vector_json_or_none(x: Optional[str]) -> Optional[Any]:
601
"""
602
Covert value to float64 array.
603
604
Parameters
605
----------
606
x : str or None
607
JSON array
608
609
Returns
610
-------
611
float64 numpy array
612
If input value is not None and numpy is installed
613
float Python list
614
If input value is not None and numpy is not installed
615
None
616
If input value is None
617
618
"""
619
if x is None:
620
return None
621
622
if has_numpy:
623
return numpy.array(json_loads(x), dtype=numpy.float64)
624
625
return map(float, json_loads(x))
626
627
628
def float64_vector_or_none(x: Optional[bytes]) -> Optional[Any]:
629
"""
630
Covert value to float64 array.
631
632
Parameters
633
----------
634
x : bytes or None
635
JSON array
636
637
Returns
638
-------
639
float64 numpy array
640
If input value is not None and numpy is installed
641
float Python list
642
If input value is not None and numpy is not installed
643
None
644
If input value is None
645
646
"""
647
if x is None:
648
return None
649
650
if has_numpy:
651
return numpy.frombuffer(x, dtype=numpy.float64)
652
653
return struct.unpack(f'<{len(x)//8}d', x)
654
655
656
def int8_vector_json_or_none(x: Optional[str]) -> Optional[Any]:
657
"""
658
Covert value to int8 array.
659
660
Parameters
661
----------
662
x : str or None
663
JSON array
664
665
Returns
666
-------
667
int8 numpy array
668
If input value is not None and numpy is installed
669
int Python list
670
If input value is not None and numpy is not installed
671
None
672
If input value is None
673
674
"""
675
if x is None:
676
return None
677
678
if has_numpy:
679
return numpy.array(json_loads(x), dtype=numpy.int8)
680
681
return map(int, json_loads(x))
682
683
684
def int8_vector_or_none(x: Optional[bytes]) -> Optional[Any]:
685
"""
686
Covert value to int8 array.
687
688
Parameters
689
----------
690
x : bytes or None
691
Little-endian block of bytes.
692
693
Returns
694
-------
695
int8 numpy array
696
If input value is not None and numpy is installed
697
int Python list
698
If input value is not None and numpy is not installed
699
None
700
If input value is None
701
702
"""
703
if x is None:
704
return None
705
706
if has_numpy:
707
return numpy.frombuffer(x, dtype=numpy.int8)
708
709
return struct.unpack(f'<{len(x)}b', x)
710
711
712
def int16_vector_json_or_none(x: Optional[str]) -> Optional[Any]:
713
"""
714
Covert value to int16 array.
715
716
Parameters
717
----------
718
x : str or None
719
JSON array
720
721
Returns
722
-------
723
int16 numpy array
724
If input value is not None and numpy is installed
725
int Python list
726
If input value is not None and numpy is not installed
727
None
728
If input value is None
729
730
"""
731
if x is None:
732
return None
733
734
if has_numpy:
735
return numpy.array(json_loads(x), dtype=numpy.int16)
736
737
return map(int, json_loads(x))
738
739
740
def int16_vector_or_none(x: Optional[bytes]) -> Optional[Any]:
741
"""
742
Covert value to int16 array.
743
744
Parameters
745
----------
746
x : bytes or None
747
Little-endian block of bytes.
748
749
Returns
750
-------
751
int16 numpy array
752
If input value is not None and numpy is installed
753
int Python list
754
If input value is not None and numpy is not installed
755
None
756
If input value is None
757
758
"""
759
if x is None:
760
return None
761
762
if has_numpy:
763
return numpy.frombuffer(x, dtype=numpy.int16)
764
765
return struct.unpack(f'<{len(x)//2}h', x)
766
767
768
def int32_vector_json_or_none(x: Optional[str]) -> Optional[Any]:
769
"""
770
Covert value to int32 array.
771
772
Parameters
773
----------
774
x : str or None
775
JSON array
776
777
Returns
778
-------
779
int32 numpy array
780
If input value is not None and numpy is installed
781
int Python list
782
If input value is not None and numpy is not installed
783
None
784
If input value is None
785
786
"""
787
if x is None:
788
return None
789
790
if has_numpy:
791
return numpy.array(json_loads(x), dtype=numpy.int32)
792
793
return map(int, json_loads(x))
794
795
796
def int32_vector_or_none(x: Optional[bytes]) -> Optional[Any]:
797
"""
798
Covert value to int32 array.
799
800
Parameters
801
----------
802
x : bytes or None
803
Little-endian block of bytes.
804
805
Returns
806
-------
807
int32 numpy array
808
If input value is not None and numpy is installed
809
int Python list
810
If input value is not None and numpy is not installed
811
None
812
If input value is None
813
814
"""
815
if x is None:
816
return None
817
818
if has_numpy:
819
return numpy.frombuffer(x, dtype=numpy.int32)
820
821
return struct.unpack(f'<{len(x)//4}l', x)
822
823
824
def int64_vector_json_or_none(x: Optional[str]) -> Optional[Any]:
825
"""
826
Covert value to int64 array.
827
828
Parameters
829
----------
830
x : str or None
831
JSON array
832
833
Returns
834
-------
835
int64 numpy array
836
If input value is not None and numpy is installed
837
int Python list
838
If input value is not None and numpy is not installed
839
None
840
If input value is None
841
842
"""
843
if x is None:
844
return None
845
846
if has_numpy:
847
return numpy.array(json_loads(x), dtype=numpy.int64)
848
849
return map(int, json_loads(x))
850
851
852
def int64_vector_or_none(x: Optional[bytes]) -> Optional[Any]:
853
"""
854
Covert value to int64 array.
855
856
Parameters
857
----------
858
x : bytes or None
859
Little-endian block of bytes.
860
861
Returns
862
-------
863
int64 numpy array
864
If input value is not None and numpy is installed
865
int Python list
866
If input value is not None and numpy is not installed
867
None
868
If input value is None
869
870
"""
871
if x is None:
872
return None
873
874
# Bytes
875
if has_numpy:
876
return numpy.frombuffer(x, dtype=numpy.int64)
877
878
return struct.unpack(f'<{len(x)//8}l', x)
879
880
881
def bson_or_none(x: Optional[bytes]) -> Optional[Any]:
882
"""
883
Convert a BSON value to a dictionary.
884
885
Parameters
886
----------
887
x : bytes or None
888
BSON formatted bytes
889
890
Returns
891
-------
892
dict
893
If input value is not None and bson package is installed
894
bytes
895
If input value is not None and bson package is not installed
896
None
897
If input value is None
898
899
"""
900
if x is None:
901
return None
902
if has_bson:
903
return bson.decode(x)
904
return x
905
906
907
# Map of database types and conversion functions
908
converters: Dict[int, Callable[..., Any]] = {
909
0: decimal_or_none,
910
1: int_or_none,
911
2: int_or_none,
912
3: int_or_none,
913
4: float_or_none,
914
5: float_or_none,
915
6: none,
916
7: datetime_or_none,
917
8: int_or_none,
918
9: int_or_none,
919
10: date_or_none,
920
11: timedelta_or_none,
921
12: datetime_or_none,
922
13: int_or_none,
923
14: date_or_none,
924
# 15: identity,
925
16: bit_or_none,
926
245: json_or_none,
927
246: decimal_or_none,
928
# 247: identity,
929
248: set_or_none,
930
# 249: identity,
931
# 250: identity,
932
# 251: identity,
933
# 252: identity,
934
# 253: identity,
935
# 254: identity,
936
255: geometry_or_none,
937
1001: bson_or_none,
938
2001: float32_vector_json_or_none,
939
2002: float64_vector_json_or_none,
940
2003: int8_vector_json_or_none,
941
2004: int16_vector_json_or_none,
942
2005: int32_vector_json_or_none,
943
2006: int64_vector_json_or_none,
944
3001: float32_vector_or_none,
945
3002: float64_vector_or_none,
946
3003: int8_vector_or_none,
947
3004: int16_vector_or_none,
948
3005: int32_vector_or_none,
949
3006: int64_vector_or_none,
950
}
951
952