Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Lib/_strptime.py
12 views
1
"""Strptime-related classes and functions.
2
3
CLASSES:
4
LocaleTime -- Discovers and stores locale-specific time information
5
TimeRE -- Creates regexes for pattern matching a string of text containing
6
time information
7
8
FUNCTIONS:
9
_getlang -- Figure out what language is being used for the locale
10
strptime -- Calculates the time struct represented by the passed-in string
11
12
"""
13
import time
14
import locale
15
import calendar
16
from re import compile as re_compile
17
from re import IGNORECASE
18
from re import escape as re_escape
19
from datetime import (date as datetime_date,
20
timedelta as datetime_timedelta,
21
timezone as datetime_timezone)
22
from _thread import allocate_lock as _thread_allocate_lock
23
24
__all__ = []
25
26
def _getlang():
27
# Figure out what the current language is set to.
28
return locale.getlocale(locale.LC_TIME)
29
30
class LocaleTime(object):
31
"""Stores and handles locale-specific information related to time.
32
33
ATTRIBUTES:
34
f_weekday -- full weekday names (7-item list)
35
a_weekday -- abbreviated weekday names (7-item list)
36
f_month -- full month names (13-item list; dummy value in [0], which
37
is added by code)
38
a_month -- abbreviated month names (13-item list, dummy value in
39
[0], which is added by code)
40
am_pm -- AM/PM representation (2-item list)
41
LC_date_time -- format string for date/time representation (string)
42
LC_date -- format string for date representation (string)
43
LC_time -- format string for time representation (string)
44
timezone -- daylight- and non-daylight-savings timezone representation
45
(2-item list of sets)
46
lang -- Language used by instance (2-item tuple)
47
"""
48
49
def __init__(self):
50
"""Set all attributes.
51
52
Order of methods called matters for dependency reasons.
53
54
The locale language is set at the offset and then checked again before
55
exiting. This is to make sure that the attributes were not set with a
56
mix of information from more than one locale. This would most likely
57
happen when using threads where one thread calls a locale-dependent
58
function while another thread changes the locale while the function in
59
the other thread is still running. Proper coding would call for
60
locks to prevent changing the locale while locale-dependent code is
61
running. The check here is done in case someone does not think about
62
doing this.
63
64
Only other possible issue is if someone changed the timezone and did
65
not call tz.tzset . That is an issue for the programmer, though,
66
since changing the timezone is worthless without that call.
67
68
"""
69
self.lang = _getlang()
70
self.__calc_weekday()
71
self.__calc_month()
72
self.__calc_am_pm()
73
self.__calc_timezone()
74
self.__calc_date_time()
75
if _getlang() != self.lang:
76
raise ValueError("locale changed during initialization")
77
if time.tzname != self.tzname or time.daylight != self.daylight:
78
raise ValueError("timezone changed during initialization")
79
80
def __calc_weekday(self):
81
# Set self.a_weekday and self.f_weekday using the calendar
82
# module.
83
a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
84
f_weekday = [calendar.day_name[i].lower() for i in range(7)]
85
self.a_weekday = a_weekday
86
self.f_weekday = f_weekday
87
88
def __calc_month(self):
89
# Set self.f_month and self.a_month using the calendar module.
90
a_month = [calendar.month_abbr[i].lower() for i in range(13)]
91
f_month = [calendar.month_name[i].lower() for i in range(13)]
92
self.a_month = a_month
93
self.f_month = f_month
94
95
def __calc_am_pm(self):
96
# Set self.am_pm by using time.strftime().
97
98
# The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
99
# magical; just happened to have used it everywhere else where a
100
# static date was needed.
101
am_pm = []
102
for hour in (1, 22):
103
time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
104
am_pm.append(time.strftime("%p", time_tuple).lower())
105
self.am_pm = am_pm
106
107
def __calc_date_time(self):
108
# Set self.date_time, self.date, & self.time by using
109
# time.strftime().
110
111
# Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
112
# overloaded numbers is minimized. The order in which searches for
113
# values within the format string is very important; it eliminates
114
# possible ambiguity for what something represents.
115
time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
116
date_time = [None, None, None]
117
date_time[0] = time.strftime("%c", time_tuple).lower()
118
date_time[1] = time.strftime("%x", time_tuple).lower()
119
date_time[2] = time.strftime("%X", time_tuple).lower()
120
replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
121
(self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
122
(self.a_month[3], '%b'), (self.am_pm[1], '%p'),
123
('1999', '%Y'), ('99', '%y'), ('22', '%H'),
124
('44', '%M'), ('55', '%S'), ('76', '%j'),
125
('17', '%d'), ('03', '%m'), ('3', '%m'),
126
# '3' needed for when no leading zero.
127
('2', '%w'), ('10', '%I')]
128
replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone
129
for tz in tz_values])
130
for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
131
current_format = date_time[offset]
132
for old, new in replacement_pairs:
133
# Must deal with possible lack of locale info
134
# manifesting itself as the empty string (e.g., Swedish's
135
# lack of AM/PM info) or a platform returning a tuple of empty
136
# strings (e.g., MacOS 9 having timezone as ('','')).
137
if old:
138
current_format = current_format.replace(old, new)
139
# If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
140
# 2005-01-03 occurs before the first Monday of the year. Otherwise
141
# %U is used.
142
time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
143
if '00' in time.strftime(directive, time_tuple):
144
U_W = '%W'
145
else:
146
U_W = '%U'
147
date_time[offset] = current_format.replace('11', U_W)
148
self.LC_date_time = date_time[0]
149
self.LC_date = date_time[1]
150
self.LC_time = date_time[2]
151
152
def __calc_timezone(self):
153
# Set self.timezone by using time.tzname.
154
# Do not worry about possibility of time.tzname[0] == time.tzname[1]
155
# and time.daylight; handle that in strptime.
156
try:
157
time.tzset()
158
except AttributeError:
159
pass
160
self.tzname = time.tzname
161
self.daylight = time.daylight
162
no_saving = frozenset({"utc", "gmt", self.tzname[0].lower()})
163
if self.daylight:
164
has_saving = frozenset({self.tzname[1].lower()})
165
else:
166
has_saving = frozenset()
167
self.timezone = (no_saving, has_saving)
168
169
170
class TimeRE(dict):
171
"""Handle conversion from format directives to regexes."""
172
173
def __init__(self, locale_time=None):
174
"""Create keys/values.
175
176
Order of execution is important for dependency reasons.
177
178
"""
179
if locale_time:
180
self.locale_time = locale_time
181
else:
182
self.locale_time = LocaleTime()
183
base = super()
184
base.__init__({
185
# The " [1-9]" part of the regex is to make %c from ANSI C work
186
'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
187
'f': r"(?P<f>[0-9]{1,6})",
188
'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
189
'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
190
'G': r"(?P<G>\d\d\d\d)",
191
'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
192
'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
193
'M': r"(?P<M>[0-5]\d|\d)",
194
'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
195
'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
196
'w': r"(?P<w>[0-6])",
197
'u': r"(?P<u>[1-7])",
198
'V': r"(?P<V>5[0-3]|0[1-9]|[1-4]\d|\d)",
199
# W is set below by using 'U'
200
'y': r"(?P<y>\d\d)",
201
#XXX: Does 'Y' need to worry about having less or more than
202
# 4 digits?
203
'Y': r"(?P<Y>\d\d\d\d)",
204
'z': r"(?P<z>[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))",
205
'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
206
'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
207
'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
208
'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
209
'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
210
'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone
211
for tz in tz_names),
212
'Z'),
213
'%': '%'})
214
base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
215
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
216
base.__setitem__('x', self.pattern(self.locale_time.LC_date))
217
base.__setitem__('X', self.pattern(self.locale_time.LC_time))
218
219
def __seqToRE(self, to_convert, directive):
220
"""Convert a list to a regex string for matching a directive.
221
222
Want possible matching values to be from longest to shortest. This
223
prevents the possibility of a match occurring for a value that also
224
a substring of a larger value that should have matched (e.g., 'abc'
225
matching when 'abcdef' should have been the match).
226
227
"""
228
to_convert = sorted(to_convert, key=len, reverse=True)
229
for value in to_convert:
230
if value != '':
231
break
232
else:
233
return ''
234
regex = '|'.join(re_escape(stuff) for stuff in to_convert)
235
regex = '(?P<%s>%s' % (directive, regex)
236
return '%s)' % regex
237
238
def pattern(self, format):
239
"""Return regex pattern for the format string.
240
241
Need to make sure that any characters that might be interpreted as
242
regex syntax are escaped.
243
244
"""
245
processed_format = ''
246
# The sub() call escapes all characters that might be misconstrued
247
# as regex syntax. Cannot use re.escape since we have to deal with
248
# format directives (%m, etc.).
249
regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
250
format = regex_chars.sub(r"\\\1", format)
251
whitespace_replacement = re_compile(r'\s+')
252
format = whitespace_replacement.sub(r'\\s+', format)
253
while '%' in format:
254
directive_index = format.index('%')+1
255
processed_format = "%s%s%s" % (processed_format,
256
format[:directive_index-1],
257
self[format[directive_index]])
258
format = format[directive_index+1:]
259
return "%s%s" % (processed_format, format)
260
261
def compile(self, format):
262
"""Return a compiled re object for the format string."""
263
return re_compile(self.pattern(format), IGNORECASE)
264
265
_cache_lock = _thread_allocate_lock()
266
# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
267
# first!
268
_TimeRE_cache = TimeRE()
269
_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
270
_regex_cache = {}
271
272
def _calc_julian_from_U_or_W(year, week_of_year, day_of_week, week_starts_Mon):
273
"""Calculate the Julian day based on the year, week of the year, and day of
274
the week, with week_start_day representing whether the week of the year
275
assumes the week starts on Sunday or Monday (6 or 0)."""
276
first_weekday = datetime_date(year, 1, 1).weekday()
277
# If we are dealing with the %U directive (week starts on Sunday), it's
278
# easier to just shift the view to Sunday being the first day of the
279
# week.
280
if not week_starts_Mon:
281
first_weekday = (first_weekday + 1) % 7
282
day_of_week = (day_of_week + 1) % 7
283
# Need to watch out for a week 0 (when the first day of the year is not
284
# the same as that specified by %U or %W).
285
week_0_length = (7 - first_weekday) % 7
286
if week_of_year == 0:
287
return 1 + day_of_week - first_weekday
288
else:
289
days_to_week = week_0_length + (7 * (week_of_year - 1))
290
return 1 + days_to_week + day_of_week
291
292
293
def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
294
"""Return a 2-tuple consisting of a time struct and an int containing
295
the number of microseconds based on the input string and the
296
format string."""
297
298
for index, arg in enumerate([data_string, format]):
299
if not isinstance(arg, str):
300
msg = "strptime() argument {} must be str, not {}"
301
raise TypeError(msg.format(index, type(arg)))
302
303
global _TimeRE_cache, _regex_cache
304
with _cache_lock:
305
locale_time = _TimeRE_cache.locale_time
306
if (_getlang() != locale_time.lang or
307
time.tzname != locale_time.tzname or
308
time.daylight != locale_time.daylight):
309
_TimeRE_cache = TimeRE()
310
_regex_cache.clear()
311
locale_time = _TimeRE_cache.locale_time
312
if len(_regex_cache) > _CACHE_MAX_SIZE:
313
_regex_cache.clear()
314
format_regex = _regex_cache.get(format)
315
if not format_regex:
316
try:
317
format_regex = _TimeRE_cache.compile(format)
318
# KeyError raised when a bad format is found; can be specified as
319
# \\, in which case it was a stray % but with a space after it
320
except KeyError as err:
321
bad_directive = err.args[0]
322
if bad_directive == "\\":
323
bad_directive = "%"
324
del err
325
raise ValueError("'%s' is a bad directive in format '%s'" %
326
(bad_directive, format)) from None
327
# IndexError only occurs when the format string is "%"
328
except IndexError:
329
raise ValueError("stray %% in format '%s'" % format) from None
330
_regex_cache[format] = format_regex
331
found = format_regex.match(data_string)
332
if not found:
333
raise ValueError("time data %r does not match format %r" %
334
(data_string, format))
335
if len(data_string) != found.end():
336
raise ValueError("unconverted data remains: %s" %
337
data_string[found.end():])
338
339
iso_year = year = None
340
month = day = 1
341
hour = minute = second = fraction = 0
342
tz = -1
343
gmtoff = None
344
gmtoff_fraction = 0
345
# Default to -1 to signify that values not known; not critical to have,
346
# though
347
iso_week = week_of_year = None
348
week_of_year_start = None
349
# weekday and julian defaulted to None so as to signal need to calculate
350
# values
351
weekday = julian = None
352
found_dict = found.groupdict()
353
for group_key in found_dict.keys():
354
# Directives not explicitly handled below:
355
# c, x, X
356
# handled by making out of other directives
357
# U, W
358
# worthless without day of the week
359
if group_key == 'y':
360
year = int(found_dict['y'])
361
# Open Group specification for strptime() states that a %y
362
#value in the range of [00, 68] is in the century 2000, while
363
#[69,99] is in the century 1900
364
if year <= 68:
365
year += 2000
366
else:
367
year += 1900
368
elif group_key == 'Y':
369
year = int(found_dict['Y'])
370
elif group_key == 'G':
371
iso_year = int(found_dict['G'])
372
elif group_key == 'm':
373
month = int(found_dict['m'])
374
elif group_key == 'B':
375
month = locale_time.f_month.index(found_dict['B'].lower())
376
elif group_key == 'b':
377
month = locale_time.a_month.index(found_dict['b'].lower())
378
elif group_key == 'd':
379
day = int(found_dict['d'])
380
elif group_key == 'H':
381
hour = int(found_dict['H'])
382
elif group_key == 'I':
383
hour = int(found_dict['I'])
384
ampm = found_dict.get('p', '').lower()
385
# If there was no AM/PM indicator, we'll treat this like AM
386
if ampm in ('', locale_time.am_pm[0]):
387
# We're in AM so the hour is correct unless we're
388
# looking at 12 midnight.
389
# 12 midnight == 12 AM == hour 0
390
if hour == 12:
391
hour = 0
392
elif ampm == locale_time.am_pm[1]:
393
# We're in PM so we need to add 12 to the hour unless
394
# we're looking at 12 noon.
395
# 12 noon == 12 PM == hour 12
396
if hour != 12:
397
hour += 12
398
elif group_key == 'M':
399
minute = int(found_dict['M'])
400
elif group_key == 'S':
401
second = int(found_dict['S'])
402
elif group_key == 'f':
403
s = found_dict['f']
404
# Pad to always return microseconds.
405
s += "0" * (6 - len(s))
406
fraction = int(s)
407
elif group_key == 'A':
408
weekday = locale_time.f_weekday.index(found_dict['A'].lower())
409
elif group_key == 'a':
410
weekday = locale_time.a_weekday.index(found_dict['a'].lower())
411
elif group_key == 'w':
412
weekday = int(found_dict['w'])
413
if weekday == 0:
414
weekday = 6
415
else:
416
weekday -= 1
417
elif group_key == 'u':
418
weekday = int(found_dict['u'])
419
weekday -= 1
420
elif group_key == 'j':
421
julian = int(found_dict['j'])
422
elif group_key in ('U', 'W'):
423
week_of_year = int(found_dict[group_key])
424
if group_key == 'U':
425
# U starts week on Sunday.
426
week_of_year_start = 6
427
else:
428
# W starts week on Monday.
429
week_of_year_start = 0
430
elif group_key == 'V':
431
iso_week = int(found_dict['V'])
432
elif group_key == 'z':
433
z = found_dict['z']
434
if z == 'Z':
435
gmtoff = 0
436
else:
437
if z[3] == ':':
438
z = z[:3] + z[4:]
439
if len(z) > 5:
440
if z[5] != ':':
441
msg = f"Inconsistent use of : in {found_dict['z']}"
442
raise ValueError(msg)
443
z = z[:5] + z[6:]
444
hours = int(z[1:3])
445
minutes = int(z[3:5])
446
seconds = int(z[5:7] or 0)
447
gmtoff = (hours * 60 * 60) + (minutes * 60) + seconds
448
gmtoff_remainder = z[8:]
449
# Pad to always return microseconds.
450
gmtoff_remainder_padding = "0" * (6 - len(gmtoff_remainder))
451
gmtoff_fraction = int(gmtoff_remainder + gmtoff_remainder_padding)
452
if z.startswith("-"):
453
gmtoff = -gmtoff
454
gmtoff_fraction = -gmtoff_fraction
455
elif group_key == 'Z':
456
# Since -1 is default value only need to worry about setting tz if
457
# it can be something other than -1.
458
found_zone = found_dict['Z'].lower()
459
for value, tz_values in enumerate(locale_time.timezone):
460
if found_zone in tz_values:
461
# Deal with bad locale setup where timezone names are the
462
# same and yet time.daylight is true; too ambiguous to
463
# be able to tell what timezone has daylight savings
464
if (time.tzname[0] == time.tzname[1] and
465
time.daylight and found_zone not in ("utc", "gmt")):
466
break
467
else:
468
tz = value
469
break
470
471
# Deal with the cases where ambiguities arise
472
# don't assume default values for ISO week/year
473
if year is None and iso_year is not None:
474
if iso_week is None or weekday is None:
475
raise ValueError("ISO year directive '%G' must be used with "
476
"the ISO week directive '%V' and a weekday "
477
"directive ('%A', '%a', '%w', or '%u').")
478
if julian is not None:
479
raise ValueError("Day of the year directive '%j' is not "
480
"compatible with ISO year directive '%G'. "
481
"Use '%Y' instead.")
482
elif week_of_year is None and iso_week is not None:
483
if weekday is None:
484
raise ValueError("ISO week directive '%V' must be used with "
485
"the ISO year directive '%G' and a weekday "
486
"directive ('%A', '%a', '%w', or '%u').")
487
else:
488
raise ValueError("ISO week directive '%V' is incompatible with "
489
"the year directive '%Y'. Use the ISO year '%G' "
490
"instead.")
491
492
leap_year_fix = False
493
if year is None and month == 2 and day == 29:
494
year = 1904 # 1904 is first leap year of 20th century
495
leap_year_fix = True
496
elif year is None:
497
year = 1900
498
499
# If we know the week of the year and what day of that week, we can figure
500
# out the Julian day of the year.
501
if julian is None and weekday is not None:
502
if week_of_year is not None:
503
week_starts_Mon = True if week_of_year_start == 0 else False
504
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
505
week_starts_Mon)
506
elif iso_year is not None and iso_week is not None:
507
datetime_result = datetime_date.fromisocalendar(iso_year, iso_week, weekday + 1)
508
year = datetime_result.year
509
month = datetime_result.month
510
day = datetime_result.day
511
if julian is not None and julian <= 0:
512
year -= 1
513
yday = 366 if calendar.isleap(year) else 365
514
julian += yday
515
516
if julian is None:
517
# Cannot pre-calculate datetime_date() since can change in Julian
518
# calculation and thus could have different value for the day of
519
# the week calculation.
520
# Need to add 1 to result since first day of the year is 1, not 0.
521
julian = datetime_date(year, month, day).toordinal() - \
522
datetime_date(year, 1, 1).toordinal() + 1
523
else: # Assume that if they bothered to include Julian day (or if it was
524
# calculated above with year/week/weekday) it will be accurate.
525
datetime_result = datetime_date.fromordinal(
526
(julian - 1) +
527
datetime_date(year, 1, 1).toordinal())
528
year = datetime_result.year
529
month = datetime_result.month
530
day = datetime_result.day
531
if weekday is None:
532
weekday = datetime_date(year, month, day).weekday()
533
# Add timezone info
534
tzname = found_dict.get("Z")
535
536
if leap_year_fix:
537
# the caller didn't supply a year but asked for Feb 29th. We couldn't
538
# use the default of 1900 for computations. We set it back to ensure
539
# that February 29th is smaller than March 1st.
540
year = 1900
541
542
return (year, month, day,
543
hour, minute, second,
544
weekday, julian, tz, tzname, gmtoff), fraction, gmtoff_fraction
545
546
def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"):
547
"""Return a time struct based on the input string and the
548
format string."""
549
tt = _strptime(data_string, format)[0]
550
return time.struct_time(tt[:time._STRUCT_TM_ITEMS])
551
552
def _strptime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"):
553
"""Return a class cls instance based on the input string and the
554
format string."""
555
tt, fraction, gmtoff_fraction = _strptime(data_string, format)
556
tzname, gmtoff = tt[-2:]
557
args = tt[:6] + (fraction,)
558
if gmtoff is not None:
559
tzdelta = datetime_timedelta(seconds=gmtoff, microseconds=gmtoff_fraction)
560
if tzname:
561
tz = datetime_timezone(tzdelta, tzname)
562
else:
563
tz = datetime_timezone(tzdelta)
564
args += (tz,)
565
566
return cls(*args)
567
568