Path: blob/main/contrib/llvm-project/libcxx/src/filesystem/path_parser.h
35231 views
//===----------------------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#ifndef PATH_PARSER_H9#define PATH_PARSER_H1011#include <__config>12#include <__utility/unreachable.h>13#include <cstddef>14#include <filesystem>15#include <utility>1617#include "format_string.h"1819_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM2021inline bool isSeparator(path::value_type C) {22if (C == '/')23return true;24#if defined(_LIBCPP_WIN32API)25if (C == '\\')26return true;27#endif28return false;29}3031inline bool isDriveLetter(path::value_type C) { return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'); }3233namespace parser {3435using string_view_t = path::__string_view;36using string_view_pair = pair<string_view_t, string_view_t>;37using PosPtr = path::value_type const*;3839struct PathParser {40enum ParserState : unsigned char {41// Zero is a special sentinel value used by default constructed iterators.42PS_BeforeBegin = path::iterator::_BeforeBegin,43PS_InRootName = path::iterator::_InRootName,44PS_InRootDir = path::iterator::_InRootDir,45PS_InFilenames = path::iterator::_InFilenames,46PS_InTrailingSep = path::iterator::_InTrailingSep,47PS_AtEnd = path::iterator::_AtEnd48};4950const string_view_t Path;51string_view_t RawEntry;52ParserState State_;5354private:55PathParser(string_view_t P, ParserState State) noexcept : Path(P), State_(State) {}5657public:58PathParser(string_view_t P, string_view_t E, unsigned char S)59: Path(P), RawEntry(E), State_(static_cast<ParserState>(S)) {60// S cannot be '0' or PS_BeforeBegin.61}6263static PathParser CreateBegin(string_view_t P) noexcept {64PathParser PP(P, PS_BeforeBegin);65PP.increment();66return PP;67}6869static PathParser CreateEnd(string_view_t P) noexcept {70PathParser PP(P, PS_AtEnd);71return PP;72}7374PosPtr peek() const noexcept {75auto TkEnd = getNextTokenStartPos();76auto End = getAfterBack();77return TkEnd == End ? nullptr : TkEnd;78}7980void increment() noexcept {81const PosPtr End = getAfterBack();82const PosPtr Start = getNextTokenStartPos();83if (Start == End)84return makeState(PS_AtEnd);8586switch (State_) {87case PS_BeforeBegin: {88PosPtr TkEnd = consumeRootName(Start, End);89if (TkEnd)90return makeState(PS_InRootName, Start, TkEnd);91}92_LIBCPP_FALLTHROUGH();93case PS_InRootName: {94PosPtr TkEnd = consumeAllSeparators(Start, End);95if (TkEnd)96return makeState(PS_InRootDir, Start, TkEnd);97else98return makeState(PS_InFilenames, Start, consumeName(Start, End));99}100case PS_InRootDir:101return makeState(PS_InFilenames, Start, consumeName(Start, End));102103case PS_InFilenames: {104PosPtr SepEnd = consumeAllSeparators(Start, End);105if (SepEnd != End) {106PosPtr TkEnd = consumeName(SepEnd, End);107if (TkEnd)108return makeState(PS_InFilenames, SepEnd, TkEnd);109}110return makeState(PS_InTrailingSep, Start, SepEnd);111}112113case PS_InTrailingSep:114return makeState(PS_AtEnd);115116case PS_AtEnd:117__libcpp_unreachable();118}119}120121void decrement() noexcept {122const PosPtr REnd = getBeforeFront();123const PosPtr RStart = getCurrentTokenStartPos() - 1;124if (RStart == REnd) // we're decrementing the begin125return makeState(PS_BeforeBegin);126127switch (State_) {128case PS_AtEnd: {129// Try to consume a trailing separator or root directory first.130if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) {131if (SepEnd == REnd)132return makeState(PS_InRootDir, Path.data(), RStart + 1);133PosPtr TkStart = consumeRootName(SepEnd, REnd);134if (TkStart == REnd)135return makeState(PS_InRootDir, RStart, RStart + 1);136return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1);137} else {138PosPtr TkStart = consumeRootName(RStart, REnd);139if (TkStart == REnd)140return makeState(PS_InRootName, TkStart + 1, RStart + 1);141TkStart = consumeName(RStart, REnd);142return makeState(PS_InFilenames, TkStart + 1, RStart + 1);143}144}145case PS_InTrailingSep:146return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1, RStart + 1);147case PS_InFilenames: {148PosPtr SepEnd = consumeAllSeparators(RStart, REnd);149if (SepEnd == REnd)150return makeState(PS_InRootDir, Path.data(), RStart + 1);151PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd);152if (TkStart == REnd) {153if (SepEnd)154return makeState(PS_InRootDir, SepEnd + 1, RStart + 1);155return makeState(PS_InRootName, TkStart + 1, RStart + 1);156}157TkStart = consumeName(SepEnd, REnd);158return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1);159}160case PS_InRootDir:161return makeState(PS_InRootName, Path.data(), RStart + 1);162case PS_InRootName:163case PS_BeforeBegin:164__libcpp_unreachable();165}166}167168/// \brief Return a view with the "preferred representation" of the current169/// element. For example trailing separators are represented as a '.'170string_view_t operator*() const noexcept {171switch (State_) {172case PS_BeforeBegin:173case PS_AtEnd:174return PATHSTR("");175case PS_InRootDir:176if (RawEntry[0] == '\\')177return PATHSTR("\\");178else179return PATHSTR("/");180case PS_InTrailingSep:181return PATHSTR("");182case PS_InRootName:183case PS_InFilenames:184return RawEntry;185}186__libcpp_unreachable();187}188189explicit operator bool() const noexcept { return State_ != PS_BeforeBegin && State_ != PS_AtEnd; }190191PathParser& operator++() noexcept {192increment();193return *this;194}195196PathParser& operator--() noexcept {197decrement();198return *this;199}200201bool atEnd() const noexcept { return State_ == PS_AtEnd; }202203bool inRootDir() const noexcept { return State_ == PS_InRootDir; }204205bool inRootName() const noexcept { return State_ == PS_InRootName; }206207bool inRootPath() const noexcept { return inRootName() || inRootDir(); }208209private:210void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept {211State_ = NewState;212RawEntry = string_view_t(Start, End - Start);213}214void makeState(ParserState NewState) noexcept {215State_ = NewState;216RawEntry = {};217}218219PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); }220221PosPtr getBeforeFront() const noexcept { return Path.data() - 1; }222223/// \brief Return a pointer to the first character after the currently224/// lexed element.225PosPtr getNextTokenStartPos() const noexcept {226switch (State_) {227case PS_BeforeBegin:228return Path.data();229case PS_InRootName:230case PS_InRootDir:231case PS_InFilenames:232return &RawEntry.back() + 1;233case PS_InTrailingSep:234case PS_AtEnd:235return getAfterBack();236}237__libcpp_unreachable();238}239240/// \brief Return a pointer to the first character in the currently lexed241/// element.242PosPtr getCurrentTokenStartPos() const noexcept {243switch (State_) {244case PS_BeforeBegin:245case PS_InRootName:246return &Path.front();247case PS_InRootDir:248case PS_InFilenames:249case PS_InTrailingSep:250return &RawEntry.front();251case PS_AtEnd:252return &Path.back() + 1;253}254__libcpp_unreachable();255}256257// Consume all consecutive separators.258PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept {259if (P == nullptr || P == End || !isSeparator(*P))260return nullptr;261const int Inc = P < End ? 1 : -1;262P += Inc;263while (P != End && isSeparator(*P))264P += Inc;265return P;266}267268// Consume exactly N separators, or return nullptr.269PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept {270PosPtr Ret = consumeAllSeparators(P, End);271if (Ret == nullptr)272return nullptr;273if (P < End) {274if (Ret == P + N)275return Ret;276} else {277if (Ret == P - N)278return Ret;279}280return nullptr;281}282283PosPtr consumeName(PosPtr P, PosPtr End) const noexcept {284PosPtr Start = P;285if (P == nullptr || P == End || isSeparator(*P))286return nullptr;287const int Inc = P < End ? 1 : -1;288P += Inc;289while (P != End && !isSeparator(*P))290P += Inc;291if (P == End && Inc < 0) {292// Iterating backwards and consumed all the rest of the input.293// Check if the start of the string would have been considered294// a root name.295PosPtr RootEnd = consumeRootName(End + 1, Start);296if (RootEnd)297return RootEnd - 1;298}299return P;300}301302PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept {303if (P == End)304return nullptr;305if (P < End) {306if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':')307return nullptr;308return P + 2;309} else {310if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':')311return nullptr;312return P - 2;313}314}315316PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept {317if (P == End)318return nullptr;319if (P < End)320return consumeName(consumeNSeparators(P, End, 2), End);321else322return consumeNSeparators(consumeName(P, End), End, 2);323}324325PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept {326#if defined(_LIBCPP_WIN32API)327if (PosPtr Ret = consumeDriveLetter(P, End))328return Ret;329if (PosPtr Ret = consumeNetworkRoot(P, End))330return Ret;331#endif332return nullptr;333}334};335336inline string_view_pair separate_filename(string_view_t const& s) {337if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty())338return string_view_pair{s, PATHSTR("")};339auto pos = s.find_last_of('.');340if (pos == string_view_t::npos || pos == 0)341return string_view_pair{s, string_view_t{}};342return string_view_pair{s.substr(0, pos), s.substr(pos)};343}344345inline string_view_t createView(PosPtr S, PosPtr E) noexcept { return {S, static_cast<size_t>(E - S) + 1}; }346347} // namespace parser348349_LIBCPP_END_NAMESPACE_FILESYSTEM350351#endif // PATH_PARSER_H352353354