Path: blob/main/contrib/llvm-project/libcxx/src/experimental/tzdb.cpp
35230 views
//===----------------------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html910#include <algorithm>11#include <chrono>12#include <filesystem>13#include <fstream>14#include <stdexcept>15#include <string>1617#include "include/tzdb/time_zone_private.h"18#include "include/tzdb/types_private.h"19#include "include/tzdb/tzdb_list_private.h"20#include "include/tzdb/tzdb_private.h"2122// Contains a parser for the IANA time zone data files.23//24// These files can be found at https://data.iana.org/time-zones/ and are in the25// public domain. Information regarding the input can be found at26// https://data.iana.org/time-zones/tz-how-to.html and27// https://man7.org/linux/man-pages/man8/zic.8.html.28//29// As indicated at https://howardhinnant.github.io/date/tz.html#Installation30// For Windows another file seems to be required31// https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml32// This file seems to contain the mapping of Windows time zone name to IANA33// time zone names.34//35// However this article mentions another way to do the mapping on Windows36// https://devblogs.microsoft.com/oldnewthing/20210527-00/?p=10525537// This requires Windows 10 Version 1903, which was released in May of 201938// and considered end of life in December 202039// https://learn.microsoft.com/en-us/lifecycle/announcements/windows-10-1903-end-of-servicing40//41// TODO TZDB Implement the Windows mapping in tzdb::current_zone4243_LIBCPP_BEGIN_NAMESPACE_STD4445namespace chrono {4647// This function is weak so it can be overriden in the tests. The48// declaration is in the test header test/support/test_tzdb.h49_LIBCPP_WEAK string_view __libcpp_tzdb_directory() {50#if defined(__linux__)51return "/usr/share/zoneinfo/";52#else53# error "unknown path to the IANA Time Zone Database"54#endif55}5657//===----------------------------------------------------------------------===//58// Details59//===----------------------------------------------------------------------===//6061[[nodiscard]] static bool __is_whitespace(int __c) { return __c == ' ' || __c == '\t'; }6263static void __skip_optional_whitespace(istream& __input) {64while (chrono::__is_whitespace(__input.peek()))65__input.get();66}6768static void __skip_mandatory_whitespace(istream& __input) {69if (!chrono::__is_whitespace(__input.get()))70std::__throw_runtime_error("corrupt tzdb: expected whitespace");7172chrono::__skip_optional_whitespace(__input);73}7475[[nodiscard]] static bool __is_eol(int __c) { return __c == '\n' || __c == std::char_traits<char>::eof(); }7677static void __skip_line(istream& __input) {78while (!chrono::__is_eol(__input.peek())) {79__input.get();80}81__input.get();82}8384static void __skip(istream& __input, char __suffix) {85if (std::tolower(__input.peek()) == __suffix)86__input.get();87}8889static void __skip(istream& __input, string_view __suffix) {90for (auto __c : __suffix)91if (std::tolower(__input.peek()) == __c)92__input.get();93}9495static void __matches(istream& __input, char __expected) {96if (std::tolower(__input.get()) != __expected)97std::__throw_runtime_error((string("corrupt tzdb: expected character '") + __expected + '\'').c_str());98}99100static void __matches(istream& __input, string_view __expected) {101for (auto __c : __expected)102if (std::tolower(__input.get()) != __c)103std::__throw_runtime_error((string("corrupt tzdb: expected string '") + string(__expected) + '\'').c_str());104}105106[[nodiscard]] static string __parse_string(istream& __input) {107string __result;108while (true) {109int __c = __input.get();110switch (__c) {111case ' ':112case '\t':113case '\n':114__input.unget();115[[fallthrough]];116case istream::traits_type::eof():117if (__result.empty())118std::__throw_runtime_error("corrupt tzdb: expected a string");119120return __result;121122default:123__result.push_back(__c);124}125}126}127128[[nodiscard]] static int64_t __parse_integral(istream& __input, bool __leading_zero_allowed) {129int64_t __result = __input.get();130if (__leading_zero_allowed) {131if (__result < '0' || __result > '9')132std::__throw_runtime_error("corrupt tzdb: expected a digit");133} else {134if (__result < '1' || __result > '9')135std::__throw_runtime_error("corrupt tzdb: expected a non-zero digit");136}137__result -= '0';138while (true) {139if (__input.peek() < '0' || __input.peek() > '9')140return __result;141142// In order to avoid possible overflows we limit the accepted range.143// Most values parsed are expected to be very small:144// - 8784 hours in a year145// - 31 days in a month146// - year no real maximum, these values are expected to be less than147// the range of the year type.148//149// However the leapseconds use a seconds after epoch value. Using an150// int would run into an overflow in 2038. By using a 64-bit value151// the range is large enough for the bilions of years. Limiting that152// range slightly to make the code easier is not an issue.153if (__result > (std::numeric_limits<int64_t>::max() / 16))154std::__throw_runtime_error("corrupt tzdb: integral too large");155156__result *= 10;157__result += __input.get() - '0';158}159}160161//===----------------------------------------------------------------------===//162// Calendar163//===----------------------------------------------------------------------===//164165[[nodiscard]] static day __parse_day(istream& __input) {166unsigned __result = chrono::__parse_integral(__input, false);167if (__result > 31)168std::__throw_runtime_error("corrupt tzdb day: value too large");169return day{__result};170}171172[[nodiscard]] static weekday __parse_weekday(istream& __input) {173// TZDB allows the shortest unique name.174switch (std::tolower(__input.get())) {175case 'f':176chrono::__skip(__input, "riday");177return Friday;178179case 'm':180chrono::__skip(__input, "onday");181return Monday;182183case 's':184switch (std::tolower(__input.get())) {185case 'a':186chrono::__skip(__input, "turday");187return Saturday;188189case 'u':190chrono::__skip(__input, "nday");191return Sunday;192}193break;194195case 't':196switch (std::tolower(__input.get())) {197case 'h':198chrono::__skip(__input, "ursday");199return Thursday;200201case 'u':202chrono::__skip(__input, "esday");203return Tuesday;204}205break;206case 'w':207chrono::__skip(__input, "ednesday");208return Wednesday;209}210211std::__throw_runtime_error("corrupt tzdb weekday: invalid name");212}213214[[nodiscard]] static month __parse_month(istream& __input) {215// TZDB allows the shortest unique name.216switch (std::tolower(__input.get())) {217case 'a':218switch (std::tolower(__input.get())) {219case 'p':220chrono::__skip(__input, "ril");221return April;222223case 'u':224chrono::__skip(__input, "gust");225return August;226}227break;228229case 'd':230chrono::__skip(__input, "ecember");231return December;232233case 'f':234chrono::__skip(__input, "ebruary");235return February;236237case 'j':238switch (std::tolower(__input.get())) {239case 'a':240chrono::__skip(__input, "nuary");241return January;242243case 'u':244switch (std::tolower(__input.get())) {245case 'n':246chrono::__skip(__input, 'e');247return June;248249case 'l':250chrono::__skip(__input, 'y');251return July;252}253}254break;255256case 'm':257if (std::tolower(__input.get()) == 'a')258switch (std::tolower(__input.get())) {259case 'y':260return May;261262case 'r':263chrono::__skip(__input, "ch");264return March;265}266break;267268case 'n':269chrono::__skip(__input, "ovember");270return November;271272case 'o':273chrono::__skip(__input, "ctober");274return October;275276case 's':277chrono::__skip(__input, "eptember");278return September;279}280std::__throw_runtime_error("corrupt tzdb month: invalid name");281}282283[[nodiscard]] static year __parse_year_value(istream& __input) {284bool __negative = __input.peek() == '-';285if (__negative) [[unlikely]]286__input.get();287288int64_t __result = __parse_integral(__input, true);289if (__result > static_cast<int>(year::max())) {290if (__negative)291std::__throw_runtime_error("corrupt tzdb year: year is less than the minimum");292293std::__throw_runtime_error("corrupt tzdb year: year is greater than the maximum");294}295296return year{static_cast<int>(__negative ? -__result : __result)};297}298299[[nodiscard]] static year __parse_year(istream& __input) {300if (std::tolower(__input.peek()) != 'm') [[likely]]301return chrono::__parse_year_value(__input);302303__input.get();304switch (std::tolower(__input.peek())) {305case 'i':306__input.get();307chrono::__skip(__input, 'n');308[[fallthrough]];309310case ' ':311// The m is minimum, even when that is ambiguous.312return year::min();313314case 'a':315__input.get();316chrono::__skip(__input, 'x');317return year::max();318}319320std::__throw_runtime_error("corrupt tzdb year: expected 'min' or 'max'");321}322323//===----------------------------------------------------------------------===//324// TZDB fields325//===----------------------------------------------------------------------===//326327[[nodiscard]] static year __parse_to(istream& __input, year __only) {328if (std::tolower(__input.peek()) != 'o')329return chrono::__parse_year(__input);330331__input.get();332chrono::__skip(__input, "nly");333return __only;334}335336[[nodiscard]] static __tz::__constrained_weekday::__comparison_t __parse_comparison(istream& __input) {337switch (__input.get()) {338case '>':339chrono::__matches(__input, '=');340return __tz::__constrained_weekday::__ge;341342case '<':343chrono::__matches(__input, '=');344return __tz::__constrained_weekday::__le;345}346std::__throw_runtime_error("corrupt tzdb on: expected '>=' or '<='");347}348349[[nodiscard]] static __tz::__on __parse_on(istream& __input) {350if (std::isdigit(__input.peek()))351return chrono::__parse_day(__input);352353if (std::tolower(__input.peek()) == 'l') {354chrono::__matches(__input, "last");355return weekday_last(chrono::__parse_weekday(__input));356}357358return __tz::__constrained_weekday{359chrono::__parse_weekday(__input), chrono::__parse_comparison(__input), chrono::__parse_day(__input)};360}361362[[nodiscard]] static seconds __parse_duration(istream& __input) {363seconds __result{0};364int __c = __input.peek();365bool __negative = __c == '-';366if (__negative) {367__input.get();368// Negative is either a negative value or a single -.369// The latter means 0 and the parsing is complete.370if (!std::isdigit(__input.peek()))371return __result;372}373374__result += hours(__parse_integral(__input, true));375if (__input.peek() != ':')376return __negative ? -__result : __result;377378__input.get();379__result += minutes(__parse_integral(__input, true));380if (__input.peek() != ':')381return __negative ? -__result : __result;382383__input.get();384__result += seconds(__parse_integral(__input, true));385if (__input.peek() != '.')386return __negative ? -__result : __result;387388__input.get();389(void)__parse_integral(__input, true); // Truncate the digits.390391return __negative ? -__result : __result;392}393394[[nodiscard]] static __tz::__clock __parse_clock(istream& __input) {395switch (__input.get()) { // case sensitive396case 'w':397return __tz::__clock::__local;398case 's':399return __tz::__clock::__standard;400401case 'u':402case 'g':403case 'z':404return __tz::__clock::__universal;405}406407__input.unget();408return __tz::__clock::__local;409}410411[[nodiscard]] static bool __parse_dst(istream& __input, seconds __offset) {412switch (__input.get()) { // case sensitive413case 's':414return false;415416case 'd':417return true;418}419420__input.unget();421return __offset != 0s;422}423424[[nodiscard]] static __tz::__at __parse_at(istream& __input) {425return {__parse_duration(__input), __parse_clock(__input)};426}427428[[nodiscard]] static __tz::__save __parse_save(istream& __input) {429seconds __time = chrono::__parse_duration(__input);430return {__time, chrono::__parse_dst(__input, __time)};431}432433[[nodiscard]] static string __parse_letters(istream& __input) {434string __result = __parse_string(__input);435// Canonicalize "-" to "" since they are equivalent in the specification.436return __result != "-" ? __result : "";437}438439[[nodiscard]] static __tz::__continuation::__rules_t __parse_rules(istream& __input) {440int __c = __input.peek();441// A single - is not a SAVE but a special case.442if (__c == '-') {443__input.get();444if (chrono::__is_whitespace(__input.peek()))445return monostate{};446__input.unget();447return chrono::__parse_save(__input);448}449450if (std::isdigit(__c) || __c == '+')451return chrono::__parse_save(__input);452453return chrono::__parse_string(__input);454}455456[[nodiscard]] static __tz::__continuation __parse_continuation(__tz::__rules_storage_type& __rules, istream& __input) {457__tz::__continuation __result;458459__result.__rule_database_ = std::addressof(__rules);460461// Note STDOFF is specified as462// This field has the same format as the AT and SAVE fields of rule lines;463// These fields have different suffix letters, these letters seem464// not to be used so do not allow any of them.465466__result.__stdoff = chrono::__parse_duration(__input);467chrono::__skip_mandatory_whitespace(__input);468__result.__rules = chrono::__parse_rules(__input);469chrono::__skip_mandatory_whitespace(__input);470__result.__format = chrono::__parse_string(__input);471chrono::__skip_optional_whitespace(__input);472473if (chrono::__is_eol(__input.peek()))474return __result;475__result.__year = chrono::__parse_year(__input);476chrono::__skip_optional_whitespace(__input);477478if (chrono::__is_eol(__input.peek()))479return __result;480__result.__in = chrono::__parse_month(__input);481chrono::__skip_optional_whitespace(__input);482483if (chrono::__is_eol(__input.peek()))484return __result;485__result.__on = chrono::__parse_on(__input);486chrono::__skip_optional_whitespace(__input);487488if (chrono::__is_eol(__input.peek()))489return __result;490__result.__at = __parse_at(__input);491492return __result;493}494495//===----------------------------------------------------------------------===//496// Time Zone Database entries497//===----------------------------------------------------------------------===//498499static string __parse_version(istream& __input) {500// The first line in tzdata.zi contains501// # version YYYYw502// The parser expects this pattern503// #\s*version\s*\(.*)504// This part is not documented.505chrono::__matches(__input, '#');506chrono::__skip_optional_whitespace(__input);507chrono::__matches(__input, "version");508chrono::__skip_mandatory_whitespace(__input);509return chrono::__parse_string(__input);510}511512[[nodiscard]]513static __tz::__rule& __create_entry(__tz::__rules_storage_type& __rules, const string& __name) {514auto __result = [&]() -> __tz::__rule& {515auto& __rule = __rules.emplace_back(__name, vector<__tz::__rule>{});516return __rule.second.emplace_back();517};518519if (__rules.empty())520return __result();521522// Typically rules are in contiguous order in the database.523// But there are exceptions, some rules are interleaved.524if (__rules.back().first == __name)525return __rules.back().second.emplace_back();526527if (auto __it = ranges::find(__rules, __name, [](const auto& __r) { return __r.first; });528__it != ranges::end(__rules))529return __it->second.emplace_back();530531return __result();532}533534static void __parse_rule(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istream& __input) {535chrono::__skip_mandatory_whitespace(__input);536string __name = chrono::__parse_string(__input);537538__tz::__rule& __rule = __create_entry(__rules, __name);539540chrono::__skip_mandatory_whitespace(__input);541__rule.__from = chrono::__parse_year(__input);542chrono::__skip_mandatory_whitespace(__input);543__rule.__to = chrono::__parse_to(__input, __rule.__from);544chrono::__skip_mandatory_whitespace(__input);545chrono::__matches(__input, '-');546chrono::__skip_mandatory_whitespace(__input);547__rule.__in = chrono::__parse_month(__input);548chrono::__skip_mandatory_whitespace(__input);549__rule.__on = chrono::__parse_on(__input);550chrono::__skip_mandatory_whitespace(__input);551__rule.__at = __parse_at(__input);552chrono::__skip_mandatory_whitespace(__input);553__rule.__save = __parse_save(__input);554chrono::__skip_mandatory_whitespace(__input);555__rule.__letters = chrono::__parse_letters(__input);556chrono::__skip_line(__input);557}558559static void __parse_zone(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istream& __input) {560chrono::__skip_mandatory_whitespace(__input);561auto __p = std::make_unique<time_zone::__impl>(chrono::__parse_string(__input), __rules);562vector<__tz::__continuation>& __continuations = __p->__continuations();563chrono::__skip_mandatory_whitespace(__input);564565do {566// The first line must be valid, continuations are optional.567__continuations.emplace_back(__parse_continuation(__rules, __input));568chrono::__skip_line(__input);569chrono::__skip_optional_whitespace(__input);570} while (std::isdigit(__input.peek()) || __input.peek() == '-');571572__tzdb.zones.emplace_back(time_zone::__create(std::move(__p)));573}574575static void __parse_link(tzdb& __tzdb, istream& __input) {576chrono::__skip_mandatory_whitespace(__input);577string __target = chrono::__parse_string(__input);578chrono::__skip_mandatory_whitespace(__input);579string __name = chrono::__parse_string(__input);580chrono::__skip_line(__input);581582__tzdb.links.emplace_back(std::__private_constructor_tag{}, std::move(__name), std::move(__target));583}584585static void __parse_tzdata(tzdb& __db, __tz::__rules_storage_type& __rules, istream& __input) {586while (true) {587int __c = std::tolower(__input.get());588589switch (__c) {590case istream::traits_type::eof():591return;592593case ' ':594case '\t':595case '\n':596break;597598case '#':599chrono::__skip_line(__input);600break;601602case 'r':603chrono::__skip(__input, "ule");604chrono::__parse_rule(__db, __rules, __input);605break;606607case 'z':608chrono::__skip(__input, "one");609chrono::__parse_zone(__db, __rules, __input);610break;611612case 'l':613chrono::__skip(__input, "ink");614chrono::__parse_link(__db, __input);615break;616617default:618std::__throw_runtime_error("corrupt tzdb: unexpected input");619}620}621}622623static void __parse_leap_seconds(vector<leap_second>& __leap_seconds, istream&& __input) {624// The file stores dates since 1 January 1900, 00:00:00, we want625// seconds since 1 January 1970.626constexpr auto __offset = sys_days{1970y / January / 1} - sys_days{1900y / January / 1};627628struct __entry {629sys_seconds __timestamp;630seconds __value;631};632vector<__entry> __entries;633[&] {634while (true) {635switch (__input.peek()) {636case istream::traits_type::eof():637return;638639case ' ':640case '\t':641case '\n':642__input.get();643continue;644645case '#':646chrono::__skip_line(__input);647continue;648}649650sys_seconds __date = sys_seconds{seconds{chrono::__parse_integral(__input, false)}} - __offset;651chrono::__skip_mandatory_whitespace(__input);652seconds __value{chrono::__parse_integral(__input, false)};653chrono::__skip_line(__input);654655__entries.emplace_back(__date, __value);656}657}();658// The Standard requires the leap seconds to be sorted. The file659// leap-seconds.list usually provides them in sorted order, but that is not660// guaranteed so we ensure it here.661ranges::sort(__entries, {}, &__entry::__timestamp);662663// The database should contain the number of seconds inserted by a leap664// second (1 or -1). So the difference between the two elements is stored.665// std::ranges::views::adjacent has not been implemented yet.666(void)ranges::adjacent_find(__entries, [&](const __entry& __first, const __entry& __second) {667__leap_seconds.emplace_back(668std::__private_constructor_tag{}, __second.__timestamp, __second.__value - __first.__value);669return false;670});671}672673void __init_tzdb(tzdb& __tzdb, __tz::__rules_storage_type& __rules) {674filesystem::path __root = chrono::__libcpp_tzdb_directory();675ifstream __tzdata{__root / "tzdata.zi"};676677__tzdb.version = chrono::__parse_version(__tzdata);678chrono::__parse_tzdata(__tzdb, __rules, __tzdata);679ranges::sort(__tzdb.zones);680ranges::sort(__tzdb.links);681ranges::sort(__rules, {}, [](const auto& p) { return p.first; });682683// There are two files with the leap second information684// - leapseconds as specified by zic685// - leap-seconds.list the source data686// The latter is much easier to parse, it seems Howard shares that687// opinion.688chrono::__parse_leap_seconds(__tzdb.leap_seconds, ifstream{__root / "leap-seconds.list"});689}690691#ifdef _WIN32692[[nodiscard]] static const time_zone* __current_zone_windows(const tzdb& tzdb) {693// TODO TZDB Implement this on Windows.694std::__throw_runtime_error("unknown time zone");695}696#else // ifdef _WIN32697[[nodiscard]] static const time_zone* __current_zone_posix(const tzdb& tzdb) {698// On POSIX systems there are several ways to configure the time zone.699// In order of priority they are:700// - TZ environment variable701// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08702// The documentation is unclear whether or not it's allowed to703// change time zone information. For example the TZ string704// MST7MDT705// this is an entry in tzdata.zi. The value706// MST707// is also an entry. Is it allowed to use the following?708// MST-3709// Even when this is valid there is no time_zone record in the710// database. Since the library would need to return a valid pointer,711// this means the library needs to allocate and leak a pointer.712//713// - The time zone name is the target of the symlink /etc/localtime714// relative to /usr/share/zoneinfo/715716// The algorithm is like this:717// - If the environment variable TZ is set and points to a valid718// record use this value.719// - Else use the name based on the `/etc/localtime` symlink.720721if (const char* __tz = getenv("TZ"))722if (const time_zone* __result = tzdb.__locate_zone(__tz))723return __result;724725filesystem::path __path = "/etc/localtime";726if (!filesystem::exists(__path))727std::__throw_runtime_error("tzdb: the symlink '/etc/localtime' does not exist");728729if (!filesystem::is_symlink(__path))730std::__throw_runtime_error("tzdb: the path '/etc/localtime' is not a symlink");731732filesystem::path __tz = filesystem::read_symlink(__path);733// The path may be a relative path, in that case convert it to an absolute734// path based on the proper initial directory.735if (__tz.is_relative())736__tz = filesystem::canonical("/etc" / __tz);737738string __name = filesystem::relative(__tz, "/usr/share/zoneinfo/");739if (const time_zone* __result = tzdb.__locate_zone(__name))740return __result;741742std::__throw_runtime_error(("tzdb: the time zone '" + __name + "' is not found in the database").c_str());743}744#endif // ifdef _WIN32745746//===----------------------------------------------------------------------===//747// Public API748//===----------------------------------------------------------------------===//749750_LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI tzdb_list& get_tzdb_list() {751static tzdb_list __result{new tzdb_list::__impl()};752return __result;753}754755[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const time_zone* tzdb::__current_zone() const {756#ifdef _WIN32757return chrono::__current_zone_windows(*this);758#else759return chrono::__current_zone_posix(*this);760#endif761}762763_LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const tzdb& reload_tzdb() {764if (chrono::remote_version() == chrono::get_tzdb().version)765return chrono::get_tzdb();766767return chrono::get_tzdb_list().__implementation().__load();768}769770_LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI string remote_version() {771filesystem::path __root = chrono::__libcpp_tzdb_directory();772ifstream __tzdata{__root / "tzdata.zi"};773return chrono::__parse_version(__tzdata);774}775776} // namespace chrono777778_LIBCPP_END_NAMESPACE_STD779780781