Path: blob/master/dep/rapidyaml/include/c4/substr.hpp
4261 views
#ifndef _C4_SUBSTR_HPP_1#define _C4_SUBSTR_HPP_23/** @file substr.hpp read+write string views */45#include <string.h>6#include <ctype.h>7#include <type_traits>89#include "c4/config.hpp"10#include "c4/error.hpp"11#include "c4/substr_fwd.hpp"1213#ifdef __clang__14# pragma clang diagnostic push15# pragma clang diagnostic ignored "-Wold-style-cast"16#elif defined(__GNUC__)17# pragma GCC diagnostic push18# pragma GCC diagnostic ignored "-Wtype-limits" // disable warnings on size_t>=0, used heavily in assertions below. These assertions are a preparation step for providing the index type as a template parameter.19# pragma GCC diagnostic ignored "-Wuseless-cast"20# pragma GCC diagnostic ignored "-Wold-style-cast"21#endif222324namespace c4 {252627//-----------------------------------------------------------------------------28//-----------------------------------------------------------------------------29//-----------------------------------------------------------------------------3031namespace detail {3233template<typename C>34static inline void _do_reverse(C *C4_RESTRICT first, C *C4_RESTRICT last)35{36while(last > first)37{38C tmp = *last;39*last-- = *first;40*first++ = tmp;41}42}4344} // namespace detail454647//-----------------------------------------------------------------------------48//-----------------------------------------------------------------------------49//-----------------------------------------------------------------------------5051// utility macros to deuglify SFINAE code; undefined after the class.52// https://stackoverflow.com/questions/43051882/how-to-disable-a-class-member-funrtion-for-certain-template-types53#define C4_REQUIRE_RW(ret_type) \54template <typename U=C> \55typename std::enable_if< ! std::is_const<U>::value, ret_type>::type565758/** a non-owning string-view, consisting of a character pointer59* and a length.60*61* @note The pointer is explicitly restricted.62*63* @see to_substr()64* @see to_csubstr()65*/66template<class C>67struct C4CORE_EXPORT basic_substring68{69public:7071/** a restricted pointer to the first character of the substring */72C * C4_RESTRICT str;73/** the length of the substring */74size_t len;7576public:7778/** @name Types */79/** @{ */8081using CC = typename std::add_const<C>::type; //!< CC=const char82using NCC_ = typename std::remove_const<C>::type; //!< NCC_=non const char8384using ro_substr = basic_substring<CC>;85using rw_substr = basic_substring<NCC_>;8687using char_type = C;88using size_type = size_t;8990using iterator = C*;91using const_iterator = CC*;9293enum : size_t { npos = (size_t)-1, NONE = (size_t)-1 };9495/// convert automatically to substring of const C96template<class U=C>97C4_ALWAYS_INLINE operator typename std::enable_if<!std::is_const<U>::value, ro_substr const&>::type () const noexcept98{99return *(ro_substr const*)this; // don't call the str+len ctor because it does a check100}101102/** @} */103104public:105106/** @name Default construction and assignment */107/** @{ */108109C4_ALWAYS_INLINE constexpr basic_substring() noexcept : str(), len() {}110111C4_ALWAYS_INLINE basic_substring(basic_substring const&) noexcept = default;112C4_ALWAYS_INLINE basic_substring(basic_substring &&) noexcept = default;113C4_ALWAYS_INLINE basic_substring(std::nullptr_t) noexcept : str(nullptr), len(0) {}114115C4_ALWAYS_INLINE basic_substring& operator= (basic_substring const&) noexcept = default;116C4_ALWAYS_INLINE basic_substring& operator= (basic_substring &&) noexcept = default;117C4_ALWAYS_INLINE basic_substring& operator= (std::nullptr_t) noexcept { str = nullptr; len = 0; return *this; }118119C4_ALWAYS_INLINE void clear() noexcept { str = nullptr; len = 0; }120121/** @} */122123public:124125/** @name Construction and assignment from characters with the same type */126/** @{ */127128/** Construct from an array.129* @warning the input string need not be zero terminated, but the130* length is taken as if the string was zero terminated */131template<size_t N>132C4_ALWAYS_INLINE constexpr basic_substring(C (&s_)[N]) noexcept : str(s_), len(N-1) {}133/** Construct from a pointer and length.134* @warning the input string need not be zero terminated. */135C4_ALWAYS_INLINE basic_substring(C *s_, size_t len_) noexcept : str(s_), len(len_) { C4_ASSERT(str || !len_); }136/** Construct from two pointers.137* @warning the end pointer MUST BE larger than or equal to the begin pointer138* @warning the input string need not be zero terminated */139C4_ALWAYS_INLINE basic_substring(C *beg_, C *end_) noexcept : str(beg_), len(static_cast<size_t>(end_ - beg_)) { C4_ASSERT(end_ >= beg_); }140/** Construct from a C-string (zero-terminated string)141* @warning the input string MUST BE zero terminated.142* @warning will call strlen()143* @note this overload uses SFINAE to prevent it from overriding the array ctor144* @see For a more detailed explanation on why the plain overloads cannot145* coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */146template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>147C4_ALWAYS_INLINE basic_substring(U s_) noexcept : str(s_), len(s_ ? strlen(s_) : 0) {}148149/** Assign from an array.150* @warning the input string need not be zero terminated, but the151* length is taken as if the string was zero terminated */152template<size_t N>153C4_ALWAYS_INLINE void assign(C (&s_)[N]) noexcept { str = (s_); len = (N-1); }154/** Assign from a pointer and length.155* @warning the input string need not be zero terminated. */156C4_ALWAYS_INLINE void assign(C *s_, size_t len_) noexcept { str = s_; len = len_; C4_ASSERT(str || !len_); }157/** Assign from two pointers.158* @warning the end pointer MUST BE larger than or equal to the begin pointer159* @warning the input string need not be zero terminated. */160C4_ALWAYS_INLINE void assign(C *beg_, C *end_) noexcept { C4_ASSERT(end_ >= beg_); str = (beg_); len = static_cast<size_t>(end_ - beg_); }161/** Assign from a C-string (zero-terminated string)162* @warning the input string must be zero terminated.163* @warning will call strlen()164* @note this overload uses SFINAE to prevent it from overriding the array ctor165* @see For a more detailed explanation on why the plain overloads cannot166* coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */167template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>168C4_ALWAYS_INLINE void assign(U s_) noexcept { str = (s_); len = (s_ ? strlen(s_) : 0); }169170/** Assign from an array.171* @warning the input string need not be zero terminated. */172template<size_t N>173C4_ALWAYS_INLINE basic_substring& operator= (C (&s_)[N]) noexcept { str = (s_); len = (N-1); return *this; }174/** Assign from a C-string (zero-terminated string)175* @warning the input string MUST BE zero terminated.176* @warning will call strlen()177* @note this overload uses SFINAE to prevent it from overriding the array ctor178* @see For a more detailed explanation on why the plain overloads cannot179* coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */180template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>181C4_ALWAYS_INLINE basic_substring& operator= (U s_) noexcept { str = s_; len = s_ ? strlen(s_) : 0; return *this; }182183/** @} */184185public:186187/** @name Standard accessor methods */188/** @{ */189190C4_ALWAYS_INLINE C4_PURE bool has_str() const noexcept { return ! empty() && str[0] != C(0); }191C4_ALWAYS_INLINE C4_PURE bool empty() const noexcept { return (len == 0 || str == nullptr); }192C4_ALWAYS_INLINE C4_PURE bool not_empty() const noexcept { return (len != 0 && str != nullptr); }193C4_ALWAYS_INLINE C4_PURE size_t size() const noexcept { return len; }194195C4_ALWAYS_INLINE C4_PURE iterator begin() noexcept { return str; }196C4_ALWAYS_INLINE C4_PURE iterator end () noexcept { return str + len; }197198C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { return str; }199C4_ALWAYS_INLINE C4_PURE const_iterator end () const noexcept { return str + len; }200201C4_ALWAYS_INLINE C4_PURE C * data() noexcept { return str; }202C4_ALWAYS_INLINE C4_PURE C const* data() const noexcept { return str; }203204C4_ALWAYS_INLINE C4_PURE C & operator[] (size_t i) noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }205C4_ALWAYS_INLINE C4_PURE C const& operator[] (size_t i) const noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }206207C4_ALWAYS_INLINE C4_PURE C & front() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }208C4_ALWAYS_INLINE C4_PURE C const& front() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }209210C4_ALWAYS_INLINE C4_PURE C & back() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }211C4_ALWAYS_INLINE C4_PURE C const& back() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }212213/** @} */214215public:216217/** @name Comparison methods */218/** @{ */219220C4_PURE int compare(C const c) const noexcept221{222C4_XASSERT((str != nullptr) || len == 0);223if(C4_LIKELY(str != nullptr && len > 0))224return (*str != c) ? *str - c : (static_cast<int>(len) - 1);225else226return -1;227}228229C4_PURE int compare(const char *C4_RESTRICT that, size_t sz) const noexcept230{231C4_XASSERT(that || sz == 0);232C4_XASSERT(str || len == 0);233if(C4_LIKELY(str && that))234{235{236const size_t min = len < sz ? len : sz;237for(size_t i = 0; i < min; ++i)238if(str[i] != that[i])239return str[i] < that[i] ? -1 : 1;240}241if(len < sz)242return -1;243else if(len == sz)244return 0;245else246return 1;247}248else if(len == sz)249{250C4_XASSERT(len == 0 && sz == 0);251return 0;252}253return len < sz ? -1 : 1;254}255256C4_ALWAYS_INLINE C4_PURE int compare(ro_substr const that) const noexcept { return this->compare(that.str, that.len); }257258C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return str == nullptr; }259C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return str != nullptr; }260261C4_ALWAYS_INLINE C4_PURE bool operator== (C const c) const noexcept { return this->compare(c) == 0; }262C4_ALWAYS_INLINE C4_PURE bool operator!= (C const c) const noexcept { return this->compare(c) != 0; }263C4_ALWAYS_INLINE C4_PURE bool operator< (C const c) const noexcept { return this->compare(c) < 0; }264C4_ALWAYS_INLINE C4_PURE bool operator> (C const c) const noexcept { return this->compare(c) > 0; }265C4_ALWAYS_INLINE C4_PURE bool operator<= (C const c) const noexcept { return this->compare(c) <= 0; }266C4_ALWAYS_INLINE C4_PURE bool operator>= (C const c) const noexcept { return this->compare(c) >= 0; }267268template<class U> C4_ALWAYS_INLINE C4_PURE bool operator== (basic_substring<U> const that) const noexcept { return this->compare(that) == 0; }269template<class U> C4_ALWAYS_INLINE C4_PURE bool operator!= (basic_substring<U> const that) const noexcept { return this->compare(that) != 0; }270template<class U> C4_ALWAYS_INLINE C4_PURE bool operator< (basic_substring<U> const that) const noexcept { return this->compare(that) < 0; }271template<class U> C4_ALWAYS_INLINE C4_PURE bool operator> (basic_substring<U> const that) const noexcept { return this->compare(that) > 0; }272template<class U> C4_ALWAYS_INLINE C4_PURE bool operator<= (basic_substring<U> const that) const noexcept { return this->compare(that) <= 0; }273template<class U> C4_ALWAYS_INLINE C4_PURE bool operator>= (basic_substring<U> const that) const noexcept { return this->compare(that) >= 0; }274275template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator== (const char (&that)[N]) const noexcept { return this->compare(that, N-1) == 0; }276template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator!= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) != 0; }277template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator< (const char (&that)[N]) const noexcept { return this->compare(that, N-1) < 0; }278template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator> (const char (&that)[N]) const noexcept { return this->compare(that, N-1) > 0; }279template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator<= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) <= 0; }280template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator>= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) >= 0; }281282/** @} */283284public:285286/** @name Sub-selection methods */287/** @{ */288289/** true if *this is a substring of that (ie, from the same buffer) */290C4_ALWAYS_INLINE C4_PURE bool is_sub(ro_substr const that) const noexcept291{292return that.is_super(*this);293}294295/** true if that is a substring of *this (ie, from the same buffer) */296C4_ALWAYS_INLINE C4_PURE bool is_super(ro_substr const that) const noexcept297{298if(C4_LIKELY(len > 0))299return that.str >= str && that.str+that.len <= str+len;300else301return that.len == 0 && that.str == str && str != nullptr;302}303304/** true if there is overlap of at least one element between that and *this */305C4_ALWAYS_INLINE C4_PURE bool overlaps(ro_substr const that) const noexcept306{307// thanks @timwynants308return that.str+that.len > str && that.str < str+len;309}310311public:312313/** return [first,len[ */314C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first) const noexcept315{316C4_ASSERT(first >= 0 && first <= len);317return basic_substring(str + first, len - first);318}319320/** return [first,first+num[. If num==npos, return [first,len[ */321C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first, size_t num) const noexcept322{323C4_ASSERT(first >= 0 && first <= len);324C4_ASSERT((num >= 0 && num <= len) || (num == npos));325size_t rnum = num != npos ? num : len - first;326C4_ASSERT((first >= 0 && first + rnum <= len) || (num == 0));327return basic_substring(str + first, rnum);328}329330/** return [first,last[. If last==npos, return [first,len[ */331C4_ALWAYS_INLINE C4_PURE basic_substring range(size_t first, size_t last=npos) const noexcept332{333C4_ASSERT(first >= 0 && first <= len);334last = last != npos ? last : len;335C4_ASSERT(first <= last);336C4_ASSERT(last >= 0 && last <= len);337return basic_substring(str + first, last - first);338}339340/** return the first @p num elements: [0,num[*/341C4_ALWAYS_INLINE C4_PURE basic_substring first(size_t num) const noexcept342{343C4_ASSERT(num <= len || num == npos);344return basic_substring(str, num != npos ? num : len);345}346347/** return the last @num elements: [len-num,len[*/348C4_ALWAYS_INLINE C4_PURE basic_substring last(size_t num) const noexcept349{350C4_ASSERT(num <= len || num == npos);351return num != npos ?352basic_substring(str + len - num, num) :353*this;354}355356/** offset from the ends: return [left,len-right[ ; ie, trim a357number of characters from the left and right. This is358equivalent to python's negative list indices. */359C4_ALWAYS_INLINE C4_PURE basic_substring offs(size_t left, size_t right) const noexcept360{361C4_ASSERT(left >= 0 && left <= len);362C4_ASSERT(right >= 0 && right <= len);363C4_ASSERT(left <= len - right + 1);364return basic_substring(str + left, len - right - left);365}366367/** return [0, pos[ . Same as .first(pos), but provided for compatibility with .right_of() */368C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos) const noexcept369{370C4_ASSERT(pos <= len || pos == npos);371return (pos != npos) ?372basic_substring(str, pos) :373*this;374}375376/** return [0, pos+include_pos[ . Same as .first(pos+1), but provided for compatibility with .right_of() */377C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos, bool include_pos) const noexcept378{379C4_ASSERT(pos <= len || pos == npos);380return (pos != npos) ?381basic_substring(str, pos+include_pos) :382*this;383}384385/** return [pos+1, len[ */386C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos) const noexcept387{388C4_ASSERT(pos <= len || pos == npos);389return (pos != npos) ?390basic_substring(str + (pos + 1), len - (pos + 1)) :391basic_substring(str + len, size_t(0));392}393394/** return [pos+!include_pos, len[ */395C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos, bool include_pos) const noexcept396{397C4_ASSERT(pos <= len || pos == npos);398return (pos != npos) ?399basic_substring(str + (pos + !include_pos), len - (pos + !include_pos)) :400basic_substring(str + len, size_t(0));401}402403public:404405/** given @p subs a substring of the current string, get the406* portion of the current string to the left of it */407C4_ALWAYS_INLINE C4_PURE basic_substring left_of(ro_substr const subs) const noexcept408{409C4_ASSERT(is_super(subs) || subs.empty());410auto ssb = subs.begin();411auto b = begin();412auto e = end();413if(ssb >= b && ssb <= e)414return sub(0, static_cast<size_t>(ssb - b));415else416return sub(0, 0);417}418419/** given @p subs a substring of the current string, get the420* portion of the current string to the right of it */421C4_ALWAYS_INLINE C4_PURE basic_substring right_of(ro_substr const subs) const noexcept422{423C4_ASSERT(is_super(subs) || subs.empty());424auto sse = subs.end();425auto b = begin();426auto e = end();427if(sse >= b && sse <= e)428return sub(static_cast<size_t>(sse - b), static_cast<size_t>(e - sse));429else430return sub(0, 0);431}432433/** @} */434435public:436437/** @name Removing characters (trim()) / patterns (strip()) from the tips of the string */438/** @{ */439440/** trim left */441basic_substring triml(const C c) const442{443if( ! empty())444{445size_t pos = first_not_of(c);446if(pos != npos)447return sub(pos);448}449return sub(0, 0);450}451/** trim left ANY of the characters.452* @see stripl() to remove a pattern from the left */453basic_substring triml(ro_substr chars) const454{455if( ! empty())456{457size_t pos = first_not_of(chars);458if(pos != npos)459return sub(pos);460}461return sub(0, 0);462}463464/** trim the character c from the right */465basic_substring trimr(const C c) const466{467if( ! empty())468{469size_t pos = last_not_of(c, npos);470if(pos != npos)471return sub(0, pos+1);472}473return sub(0, 0);474}475/** trim right ANY of the characters476* @see stripr() to remove a pattern from the right */477basic_substring trimr(ro_substr chars) const478{479if( ! empty())480{481size_t pos = last_not_of(chars, npos);482if(pos != npos)483return sub(0, pos+1);484}485return sub(0, 0);486}487488/** trim the character c left and right */489basic_substring trim(const C c) const490{491return triml(c).trimr(c);492}493/** trim left and right ANY of the characters494* @see strip() to remove a pattern from the left and right */495basic_substring trim(ro_substr const chars) const496{497return triml(chars).trimr(chars);498}499500/** remove a pattern from the left501* @see triml() to remove characters*/502basic_substring stripl(ro_substr pattern) const503{504if( ! begins_with(pattern))505return *this;506return sub(pattern.len < len ? pattern.len : len);507}508509/** remove a pattern from the right510* @see trimr() to remove characters*/511basic_substring stripr(ro_substr pattern) const512{513if( ! ends_with(pattern))514return *this;515return left_of(len - (pattern.len < len ? pattern.len : len));516}517518/** @} */519520public:521522/** @name Lookup methods */523/** @{ */524525inline size_t find(const C c, size_t start_pos=0) const526{527return first_of(c, start_pos);528}529inline size_t find(ro_substr pattern, size_t start_pos=0) const530{531C4_ASSERT(start_pos == npos || (start_pos >= 0 && start_pos <= len));532if(len < pattern.len) return npos;533for(size_t i = start_pos, e = len - pattern.len + 1; i < e; ++i)534{535bool gotit = true;536for(size_t j = 0; j < pattern.len; ++j)537{538C4_ASSERT(i + j < len);539if(str[i + j] != pattern.str[j])540{541gotit = false;542break;543}544}545if(gotit)546{547return i;548}549}550return npos;551}552553public:554555/** count the number of occurrences of c */556inline size_t count(const C c, size_t pos=0) const557{558C4_ASSERT(pos >= 0 && pos <= len);559size_t num = 0;560pos = find(c, pos);561while(pos != npos)562{563++num;564pos = find(c, pos + 1);565}566return num;567}568569/** count the number of occurrences of s */570inline size_t count(ro_substr c, size_t pos=0) const571{572C4_ASSERT(pos >= 0 && pos <= len);573size_t num = 0;574pos = find(c, pos);575while(pos != npos)576{577++num;578pos = find(c, pos + c.len);579}580return num;581}582583/** get the substr consisting of the first occurrence of @p c after @p pos, or an empty substr if none occurs */584inline basic_substring select(const C c, size_t pos=0) const585{586pos = find(c, pos);587return pos != npos ? sub(pos, 1) : basic_substring();588}589590/** get the substr consisting of the first occurrence of @p pattern after @p pos, or an empty substr if none occurs */591inline basic_substring select(ro_substr pattern, size_t pos=0) const592{593pos = find(pattern, pos);594return pos != npos ? sub(pos, pattern.len) : basic_substring();595}596597public:598599struct first_of_any_result600{601size_t which;602size_t pos;603inline operator bool() const { return which != NONE && pos != npos; }604};605606first_of_any_result first_of_any(ro_substr s0, ro_substr s1) const607{608ro_substr s[2] = {s0, s1};609return first_of_any_iter(&s[0], &s[0] + 2);610}611612first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2) const613{614ro_substr s[3] = {s0, s1, s2};615return first_of_any_iter(&s[0], &s[0] + 3);616}617618first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2, ro_substr s3) const619{620ro_substr s[4] = {s0, s1, s2, s3};621return first_of_any_iter(&s[0], &s[0] + 4);622}623624first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2, ro_substr s3, ro_substr s4) const625{626ro_substr s[5] = {s0, s1, s2, s3, s4};627return first_of_any_iter(&s[0], &s[0] + 5);628}629630template<class It>631first_of_any_result first_of_any_iter(It first_span, It last_span) const632{633for(size_t i = 0; i < len; ++i)634{635size_t curr = 0;636for(It it = first_span; it != last_span; ++curr, ++it)637{638auto const& chars = *it;639if((i + chars.len) > len) continue;640bool gotit = true;641for(size_t j = 0; j < chars.len; ++j)642{643C4_ASSERT(i + j < len);644if(str[i + j] != chars[j])645{646gotit = false;647break;648}649}650if(gotit)651{652return {curr, i};653}654}655}656return {NONE, npos};657}658659public:660661/** true if the first character of the string is @p c */662bool begins_with(const C c) const663{664return len > 0 ? str[0] == c : false;665}666667/** true if the first @p num characters of the string are @p c */668bool begins_with(const C c, size_t num) const669{670if(len < num)671{672return false;673}674for(size_t i = 0; i < num; ++i)675{676if(str[i] != c)677{678return false;679}680}681return true;682}683684/** true if the string begins with the given @p pattern */685bool begins_with(ro_substr pattern) const686{687if(len < pattern.len)688{689return false;690}691for(size_t i = 0; i < pattern.len; ++i)692{693if(str[i] != pattern[i])694{695return false;696}697}698return true;699}700701/** true if the first character of the string is any of the given @p chars */702bool begins_with_any(ro_substr chars) const703{704if(len == 0)705{706return false;707}708for(size_t i = 0; i < chars.len; ++i)709{710if(str[0] == chars.str[i])711{712return true;713}714}715return false;716}717718/** true if the last character of the string is @p c */719bool ends_with(const C c) const720{721return len > 0 ? str[len-1] == c : false;722}723724/** true if the last @p num characters of the string are @p c */725bool ends_with(const C c, size_t num) const726{727if(len < num)728{729return false;730}731for(size_t i = len - num; i < len; ++i)732{733if(str[i] != c)734{735return false;736}737}738return true;739}740741/** true if the string ends with the given @p pattern */742bool ends_with(ro_substr pattern) const743{744if(len < pattern.len)745{746return false;747}748for(size_t i = 0, s = len-pattern.len; i < pattern.len; ++i)749{750if(str[s+i] != pattern[i])751{752return false;753}754}755return true;756}757758/** true if the last character of the string is any of the given @p chars */759bool ends_with_any(ro_substr chars) const760{761if(len == 0)762{763return false;764}765for(size_t i = 0; i < chars.len; ++i)766{767if(str[len - 1] == chars[i])768{769return true;770}771}772return false;773}774775public:776777/** @return the first position where c is found in the string, or npos if none is found */778size_t first_of(const C c, size_t start=0) const779{780C4_ASSERT(start == npos || (start >= 0 && start <= len));781for(size_t i = start; i < len; ++i)782{783if(str[i] == c)784return i;785}786return npos;787}788789/** @return the last position where c is found in the string, or npos if none is found */790size_t last_of(const C c, size_t start=npos) const791{792C4_ASSERT(start == npos || (start >= 0 && start <= len));793if(start == npos)794start = len;795for(size_t i = start-1; i != size_t(-1); --i)796{797if(str[i] == c)798return i;799}800return npos;801}802803/** @return the first position where ANY of the chars is found in the string, or npos if none is found */804size_t first_of(ro_substr chars, size_t start=0) const805{806C4_ASSERT(start == npos || (start >= 0 && start <= len));807for(size_t i = start; i < len; ++i)808{809for(size_t j = 0; j < chars.len; ++j)810{811if(str[i] == chars[j])812return i;813}814}815return npos;816}817818/** @return the last position where ANY of the chars is found in the string, or npos if none is found */819size_t last_of(ro_substr chars, size_t start=npos) const820{821C4_ASSERT(start == npos || (start >= 0 && start <= len));822if(start == npos)823start = len;824for(size_t i = start-1; i != size_t(-1); --i)825{826for(size_t j = 0; j < chars.len; ++j)827{828if(str[i] == chars[j])829return i;830}831}832return npos;833}834835public:836837size_t first_not_of(const C c, size_t start=0) const838{839C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0));840for(size_t i = start; i < len; ++i)841{842if(str[i] != c)843return i;844}845return npos;846}847848size_t last_not_of(const C c, size_t start=npos) const849{850C4_ASSERT(start == npos || (start >= 0 && start <= len));851if(start == npos)852start = len;853for(size_t i = start-1; i != size_t(-1); --i)854{855if(str[i] != c)856return i;857}858return npos;859}860861size_t first_not_of(ro_substr chars, size_t start=0) const862{863C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0));864for(size_t i = start; i < len; ++i)865{866bool gotit = true;867for(size_t j = 0; j < chars.len; ++j)868{869if(str[i] == chars.str[j])870{871gotit = false;872break;873}874}875if(gotit)876{877return i;878}879}880return npos;881}882883size_t last_not_of(ro_substr chars, size_t start=npos) const884{885C4_ASSERT(start == npos || (start >= 0 && start <= len));886if(start == npos)887start = len;888for(size_t i = start-1; i != size_t(-1); --i)889{890bool gotit = true;891for(size_t j = 0; j < chars.len; ++j)892{893if(str[i] == chars.str[j])894{895gotit = false;896break;897}898}899if(gotit)900{901return i;902}903}904return npos;905}906907/** @} */908909public:910911/** @name Range lookup methods */912/** @{ */913914/** get the range delimited by an open-close pair of characters.915* @note There must be no nested pairs.916* @note No checks for escapes are performed. */917basic_substring pair_range(CC open, CC close) const918{919size_t b = find(open);920if(b == npos)921return basic_substring();922size_t e = find(close, b+1);923if(e == npos)924return basic_substring();925basic_substring ret = range(b, e+1);926C4_ASSERT(ret.sub(1).find(open) == npos);927return ret;928}929930/** get the range delimited by a single open-close character (eg, quotes).931* @note The open-close character can be escaped. */932basic_substring pair_range_esc(CC open_close, CC escape=CC('\\'))933{934size_t b = find(open_close);935if(b == npos) return basic_substring();936for(size_t i = b+1; i < len; ++i)937{938CC c = str[i];939if(c == open_close)940{941if(str[i-1] != escape)942{943return range(b, i+1);944}945}946}947return basic_substring();948}949950/** get the range delimited by an open-close pair of characters,951* with possibly nested occurrences. No checks for escapes are952* performed. */953basic_substring pair_range_nested(CC open, CC close) const954{955size_t b = find(open);956if(b == npos) return basic_substring();957size_t e, curr = b+1, count = 0;958const char both[] = {open, close, '\0'};959while((e = first_of(both, curr)) != npos)960{961if(str[e] == open)962{963++count;964curr = e+1;965}966else if(str[e] == close)967{968if(count == 0) return range(b, e+1);969--count;970curr = e+1;971}972}973return basic_substring();974}975976basic_substring unquoted() const977{978constexpr const C dq('"'), sq('\'');979if(len >= 2 && (str[len - 2] != C('\\')) &&980((begins_with(sq) && ends_with(sq))981||982(begins_with(dq) && ends_with(dq))))983{984return range(1, len -1);985}986return *this;987}988989/** @} */990991public:992993/** @name Number-matching query methods */994/** @{ */995996/** @return true if the substring contents are a floating-point or integer number.997* @note any leading or trailing whitespace will return false. */998bool is_number() const999{1000if(empty() || (first_non_empty_span().empty()))1001return false;1002if(first_uint_span() == *this)1003return true;1004if(first_int_span() == *this)1005return true;1006if(first_real_span() == *this)1007return true;1008return false;1009}10101011/** @return true if the substring contents are a real number.1012* @note any leading or trailing whitespace will return false. */1013bool is_real() const1014{1015if(empty() || (first_non_empty_span().empty()))1016return false;1017if(first_real_span() == *this)1018return true;1019return false;1020}10211022/** @return true if the substring contents are an integer number.1023* @note any leading or trailing whitespace will return false. */1024bool is_integer() const1025{1026if(empty() || (first_non_empty_span().empty()))1027return false;1028if(first_uint_span() == *this)1029return true;1030if(first_int_span() == *this)1031return true;1032return false;1033}10341035/** @return true if the substring contents are an unsigned integer number.1036* @note any leading or trailing whitespace will return false. */1037bool is_unsigned_integer() const1038{1039if(empty() || (first_non_empty_span().empty()))1040return false;1041if(first_uint_span() == *this)1042return true;1043return false;1044}10451046/** get the first span consisting exclusively of non-empty characters */1047basic_substring first_non_empty_span() const1048{1049constexpr const ro_substr empty_chars(" \n\r\t");1050size_t pos = first_not_of(empty_chars);1051if(pos == npos)1052return first(0);1053auto ret = sub(pos);1054pos = ret.first_of(empty_chars);1055return ret.first(pos);1056}10571058/** get the first span which can be interpreted as an unsigned integer */1059basic_substring first_uint_span() const1060{1061basic_substring ne = first_non_empty_span();1062if(ne.empty())1063return ne;1064if(ne.str[0] == '-')1065return first(0);1066size_t skip_start = size_t(ne.str[0] == '+');1067return ne._first_integral_span(skip_start);1068}10691070/** get the first span which can be interpreted as a signed integer */1071basic_substring first_int_span() const1072{1073basic_substring ne = first_non_empty_span();1074if(ne.empty())1075return ne;1076size_t skip_start = size_t(ne.str[0] == '+' || ne.str[0] == '-');1077return ne._first_integral_span(skip_start);1078}10791080basic_substring _first_integral_span(size_t skip_start) const1081{1082C4_ASSERT(!empty());1083if(skip_start == len)1084return first(0);1085C4_ASSERT(skip_start < len);1086if(len >= skip_start + 3)1087{1088if(str[skip_start] != '0')1089{1090for(size_t i = skip_start; i < len; ++i)1091{1092char c = str[i];1093if(c < '0' || c > '9')1094return i > skip_start && _is_delim_char(c) ? first(i) : first(0);1095}1096}1097else1098{1099char next = str[skip_start + 1];1100if(next == 'x' || next == 'X')1101{1102skip_start += 2;1103for(size_t i = skip_start; i < len; ++i)1104{1105const char c = str[i];1106if( ! _is_hex_char(c))1107return i > skip_start && _is_delim_char(c) ? first(i) : first(0);1108}1109return *this;1110}1111else if(next == 'b' || next == 'B')1112{1113skip_start += 2;1114for(size_t i = skip_start; i < len; ++i)1115{1116const char c = str[i];1117if(c != '0' && c != '1')1118return i > skip_start && _is_delim_char(c) ? first(i) : first(0);1119}1120return *this;1121}1122else if(next == 'o' || next == 'O')1123{1124skip_start += 2;1125for(size_t i = skip_start; i < len; ++i)1126{1127const char c = str[i];1128if(c < '0' || c > '7')1129return i > skip_start && _is_delim_char(c) ? first(i) : first(0);1130}1131return *this;1132}1133}1134}1135// must be a decimal, or it is not a an number1136for(size_t i = skip_start; i < len; ++i)1137{1138const char c = str[i];1139if(c < '0' || c > '9')1140return i > skip_start && _is_delim_char(c) ? first(i) : first(0);1141}1142return *this;1143}11441145/** get the first span which can be interpreted as a real (floating-point) number */1146basic_substring first_real_span() const1147{1148basic_substring ne = first_non_empty_span();1149if(ne.empty())1150return ne;1151size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-');1152C4_ASSERT(skip_start == 0 || skip_start == 1);1153// if we have at least three digits after the leading sign, it1154// can be decimal, or hex, or bin or oct. Ex:1155// non-decimal: 0x0, 0b0, 0o01156// decimal: 1.0, 10., 1e1, 100, inf, nan, infinity1157if(ne.len >= skip_start+3)1158{1159// if it does not have leading 0, it must be decimal, or it is not a real1160if(ne.str[skip_start] != '0')1161{1162if(ne.str[skip_start] == 'i') // is it infinity or inf?1163{1164basic_substring word = ne._word_follows(skip_start + 1, "nfinity");1165if(word.len)1166return word;1167return ne._word_follows(skip_start + 1, "nf");1168}1169else if(ne.str[skip_start] == 'n') // is it nan?1170{1171return ne._word_follows(skip_start + 1, "an");1172}1173else // must be a decimal, or it is not a real1174{1175return ne._first_real_span_dec(skip_start);1176}1177}1178else // starts with 0. is it 0x, 0b or 0o?1179{1180const char next = ne.str[skip_start + 1];1181// hexadecimal1182if(next == 'x' || next == 'X')1183return ne._first_real_span_hex(skip_start + 2);1184// binary1185else if(next == 'b' || next == 'B')1186return ne._first_real_span_bin(skip_start + 2);1187// octal1188else if(next == 'o' || next == 'O')1189return ne._first_real_span_oct(skip_start + 2);1190// none of the above. may still be a decimal.1191else1192return ne._first_real_span_dec(skip_start); // do not skip the 0.1193}1194}1195// less than 3 chars after the leading sign. It is either a1196// decimal or it is not a real. (cannot be any of 0x0, etc).1197return ne._first_real_span_dec(skip_start);1198}11991200/** true if the character is a delimiter character *at the end* */1201static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_delim_char(char c) noexcept1202{1203return c == ' ' || c == '\n'1204|| c == ']' || c == ')' || c == '}'1205|| c == ',' || c == ';' || c == '\r' || c == '\t' || c == '\0';1206}12071208/** true if the character is in [0-9a-fA-F] */1209static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_hex_char(char c) noexcept1210{1211return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');1212}12131214C4_NO_INLINE C4_PURE basic_substring _word_follows(size_t pos, csubstr word) const noexcept1215{1216size_t posend = pos + word.len;1217if(len >= posend && sub(pos, word.len) == word)1218if(len == posend || _is_delim_char(str[posend]))1219return first(posend);1220return first(0);1221}12221223// this function is declared inside the class to avoid a VS error with __declspec(dllimport)1224C4_NO_INLINE C4_PURE basic_substring _first_real_span_dec(size_t pos) const noexcept1225{1226bool intchars = false;1227bool fracchars = false;1228bool powchars;1229// integral part1230for( ; pos < len; ++pos)1231{1232const char c = str[pos];1233if(c >= '0' && c <= '9')1234{1235intchars = true;1236}1237else if(c == '.')1238{1239++pos;1240goto fractional_part_dec;1241}1242else if(c == 'e' || c == 'E')1243{1244++pos;1245goto power_part_dec;1246}1247else if(_is_delim_char(c))1248{1249return intchars ? first(pos) : first(0);1250}1251else1252{1253return first(0);1254}1255}1256// no . or p were found; this is either an integral number1257// or not a number at all1258return intchars ?1259*this :1260first(0);1261fractional_part_dec:1262C4_ASSERT(pos > 0);1263C4_ASSERT(str[pos - 1] == '.');1264for( ; pos < len; ++pos)1265{1266const char c = str[pos];1267if(c >= '0' && c <= '9')1268{1269fracchars = true;1270}1271else if(c == 'e' || c == 'E')1272{1273++pos;1274goto power_part_dec;1275}1276else if(_is_delim_char(c))1277{1278return intchars || fracchars ? first(pos) : first(0);1279}1280else1281{1282return first(0);1283}1284}1285return intchars || fracchars ?1286*this :1287first(0);1288power_part_dec:1289C4_ASSERT(pos > 0);1290C4_ASSERT(str[pos - 1] == 'e' || str[pos - 1] == 'E');1291// either a + or a - is expected here, followed by more chars.1292// also, using (pos+1) in this check will cause an early1293// return when no more chars follow the sign.1294if(len <= (pos+1) || ((!intchars) && (!fracchars)))1295return first(0);1296++pos; // this was the sign.1297// ... so the (pos+1) ensures that we enter the loop and1298// hence that there exist chars in the power part1299powchars = false;1300for( ; pos < len; ++pos)1301{1302const char c = str[pos];1303if(c >= '0' && c <= '9')1304powchars = true;1305else if(powchars && _is_delim_char(c))1306return first(pos);1307else1308return first(0);1309}1310return *this;1311}13121313// this function is declared inside the class to avoid a VS error with __declspec(dllimport)1314C4_NO_INLINE C4_PURE basic_substring _first_real_span_hex(size_t pos) const noexcept1315{1316bool intchars = false;1317bool fracchars = false;1318bool powchars;1319// integral part1320for( ; pos < len; ++pos)1321{1322const char c = str[pos];1323if(_is_hex_char(c))1324{1325intchars = true;1326}1327else if(c == '.')1328{1329++pos;1330goto fractional_part_hex;1331}1332else if(c == 'p' || c == 'P')1333{1334++pos;1335goto power_part_hex;1336}1337else if(_is_delim_char(c))1338{1339return intchars ? first(pos) : first(0);1340}1341else1342{1343return first(0);1344}1345}1346// no . or p were found; this is either an integral number1347// or not a number at all1348return intchars ?1349*this :1350first(0);1351fractional_part_hex:1352C4_ASSERT(pos > 0);1353C4_ASSERT(str[pos - 1] == '.');1354for( ; pos < len; ++pos)1355{1356const char c = str[pos];1357if(_is_hex_char(c))1358{1359fracchars = true;1360}1361else if(c == 'p' || c == 'P')1362{1363++pos;1364goto power_part_hex;1365}1366else if(_is_delim_char(c))1367{1368return intchars || fracchars ? first(pos) : first(0);1369}1370else1371{1372return first(0);1373}1374}1375return intchars || fracchars ?1376*this :1377first(0);1378power_part_hex:1379C4_ASSERT(pos > 0);1380C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');1381// either a + or a - is expected here, followed by more chars.1382// also, using (pos+1) in this check will cause an early1383// return when no more chars follow the sign.1384if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))1385return first(0);1386++pos; // this was the sign.1387// ... so the (pos+1) ensures that we enter the loop and1388// hence that there exist chars in the power part1389powchars = false;1390for( ; pos < len; ++pos)1391{1392const char c = str[pos];1393if(c >= '0' && c <= '9')1394powchars = true;1395else if(powchars && _is_delim_char(c))1396return first(pos);1397else1398return first(0);1399}1400return *this;1401}14021403// this function is declared inside the class to avoid a VS error with __declspec(dllimport)1404C4_NO_INLINE C4_PURE basic_substring _first_real_span_bin(size_t pos) const noexcept1405{1406bool intchars = false;1407bool fracchars = false;1408bool powchars;1409// integral part1410for( ; pos < len; ++pos)1411{1412const char c = str[pos];1413if(c == '0' || c == '1')1414{1415intchars = true;1416}1417else if(c == '.')1418{1419++pos;1420goto fractional_part_bin;1421}1422else if(c == 'p' || c == 'P')1423{1424++pos;1425goto power_part_bin;1426}1427else if(_is_delim_char(c))1428{1429return intchars ? first(pos) : first(0);1430}1431else1432{1433return first(0);1434}1435}1436// no . or p were found; this is either an integral number1437// or not a number at all1438return intchars ?1439*this :1440first(0);1441fractional_part_bin:1442C4_ASSERT(pos > 0);1443C4_ASSERT(str[pos - 1] == '.');1444for( ; pos < len; ++pos)1445{1446const char c = str[pos];1447if(c == '0' || c == '1')1448{1449fracchars = true;1450}1451else if(c == 'p' || c == 'P')1452{1453++pos;1454goto power_part_bin;1455}1456else if(_is_delim_char(c))1457{1458return intchars || fracchars ? first(pos) : first(0);1459}1460else1461{1462return first(0);1463}1464}1465return intchars || fracchars ?1466*this :1467first(0);1468power_part_bin:1469C4_ASSERT(pos > 0);1470C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');1471// either a + or a - is expected here, followed by more chars.1472// also, using (pos+1) in this check will cause an early1473// return when no more chars follow the sign.1474if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))1475return first(0);1476++pos; // this was the sign.1477// ... so the (pos+1) ensures that we enter the loop and1478// hence that there exist chars in the power part1479powchars = false;1480for( ; pos < len; ++pos)1481{1482const char c = str[pos];1483if(c >= '0' && c <= '9')1484powchars = true;1485else if(powchars && _is_delim_char(c))1486return first(pos);1487else1488return first(0);1489}1490return *this;1491}14921493// this function is declared inside the class to avoid a VS error with __declspec(dllimport)1494C4_NO_INLINE C4_PURE basic_substring _first_real_span_oct(size_t pos) const noexcept1495{1496bool intchars = false;1497bool fracchars = false;1498bool powchars;1499// integral part1500for( ; pos < len; ++pos)1501{1502const char c = str[pos];1503if(c >= '0' && c <= '7')1504{1505intchars = true;1506}1507else if(c == '.')1508{1509++pos;1510goto fractional_part_oct;1511}1512else if(c == 'p' || c == 'P')1513{1514++pos;1515goto power_part_oct;1516}1517else if(_is_delim_char(c))1518{1519return intchars ? first(pos) : first(0);1520}1521else1522{1523return first(0);1524}1525}1526// no . or p were found; this is either an integral number1527// or not a number at all1528return intchars ?1529*this :1530first(0);1531fractional_part_oct:1532C4_ASSERT(pos > 0);1533C4_ASSERT(str[pos - 1] == '.');1534for( ; pos < len; ++pos)1535{1536const char c = str[pos];1537if(c >= '0' && c <= '7')1538{1539fracchars = true;1540}1541else if(c == 'p' || c == 'P')1542{1543++pos;1544goto power_part_oct;1545}1546else if(_is_delim_char(c))1547{1548return intchars || fracchars ? first(pos) : first(0);1549}1550else1551{1552return first(0);1553}1554}1555return intchars || fracchars ?1556*this :1557first(0);1558power_part_oct:1559C4_ASSERT(pos > 0);1560C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');1561// either a + or a - is expected here, followed by more chars.1562// also, using (pos+1) in this check will cause an early1563// return when no more chars follow the sign.1564if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))1565return first(0);1566++pos; // this was the sign.1567// ... so the (pos+1) ensures that we enter the loop and1568// hence that there exist chars in the power part1569powchars = false;1570for( ; pos < len; ++pos)1571{1572const char c = str[pos];1573if(c >= '0' && c <= '9')1574powchars = true;1575else if(powchars && _is_delim_char(c))1576return first(pos);1577else1578return first(0);1579}1580return *this;1581}15821583/** @} */15841585public:15861587/** @name Splitting methods */1588/** @{ */15891590/** returns true if the string has not been exhausted yet, meaning1591* it's ok to call next_split() again. When no instance of sep1592* exists in the string, returns the full string. When the input1593* is an empty string, the output string is the empty string. */1594bool next_split(C sep, size_t *C4_RESTRICT start_pos, basic_substring *C4_RESTRICT out) const1595{1596if(C4_LIKELY(*start_pos < len))1597{1598for(size_t i = *start_pos; i < len; i++)1599{1600if(str[i] == sep)1601{1602out->assign(str + *start_pos, i - *start_pos);1603*start_pos = i+1;1604return true;1605}1606}1607out->assign(str + *start_pos, len - *start_pos);1608*start_pos = len + 1;1609return true;1610}1611else1612{1613bool valid = len > 0 && (*start_pos == len);1614if(valid && str && str[len-1] == sep)1615{1616out->assign(str + len, size_t(0)); // the cast is needed to prevent overload ambiguity1617}1618else1619{1620out->assign(str + len + 1, size_t(0)); // the cast is needed to prevent overload ambiguity1621}1622*start_pos = len + 1;1623return valid;1624}1625}16261627private:16281629struct split_proxy_impl1630{1631struct split_iterator_impl1632{1633split_proxy_impl const* m_proxy;1634basic_substring m_str;1635size_t m_pos;1636NCC_ m_sep;16371638split_iterator_impl(split_proxy_impl const* proxy, size_t pos, C sep)1639: m_proxy(proxy), m_pos(pos), m_sep(sep)1640{1641_tick();1642}16431644void _tick()1645{1646m_proxy->m_str.next_split(m_sep, &m_pos, &m_str);1647}16481649split_iterator_impl& operator++ () { _tick(); return *this; }1650split_iterator_impl operator++ (int) { split_iterator_impl it = *this; _tick(); return it; }16511652basic_substring& operator* () { return m_str; }1653basic_substring* operator-> () { return &m_str; }16541655bool operator!= (split_iterator_impl const& that) const1656{1657return !(this->operator==(that));1658}1659bool operator== (split_iterator_impl const& that) const1660{1661C4_XASSERT((m_sep == that.m_sep) && "cannot compare split iterators with different separators");1662if(m_str.size() != that.m_str.size())1663return false;1664if(m_str.data() != that.m_str.data())1665return false;1666return m_pos == that.m_pos;1667}1668};16691670basic_substring m_str;1671size_t m_start_pos;1672C m_sep;16731674split_proxy_impl(basic_substring str_, size_t start_pos, C sep)1675: m_str(str_), m_start_pos(start_pos), m_sep(sep)1676{1677}16781679split_iterator_impl begin() const1680{1681auto it = split_iterator_impl(this, m_start_pos, m_sep);1682return it;1683}1684split_iterator_impl end() const1685{1686size_t pos = m_str.size() + 1;1687auto it = split_iterator_impl(this, pos, m_sep);1688return it;1689}1690};16911692public:16931694using split_proxy = split_proxy_impl;16951696/** a view into the splits */1697split_proxy split(C sep, size_t start_pos=0) const1698{1699C4_XASSERT((start_pos >= 0 && start_pos < len) || empty());1700auto ss = sub(0, len);1701auto it = split_proxy(ss, start_pos, sep);1702return it;1703}17041705public:17061707/** pop right: return the first split from the right. Use1708* gpop_left() to get the reciprocal part.1709*/1710basic_substring pop_right(C sep=C('/'), bool skip_empty=false) const1711{1712if(C4_LIKELY(len > 1))1713{1714auto pos = last_of(sep);1715if(pos != npos)1716{1717if(pos + 1 < len) // does not end with sep1718{1719return sub(pos + 1); // return from sep to end1720}1721else // the string ends with sep1722{1723if( ! skip_empty)1724{1725return sub(pos + 1, 0);1726}1727auto ppos = last_not_of(sep); // skip repeated seps1728if(ppos == npos) // the string is all made of seps1729{1730return sub(0, 0);1731}1732// find the previous sep1733auto pos0 = last_of(sep, ppos);1734if(pos0 == npos) // only the last sep exists1735{1736return sub(0); // return the full string (because skip_empty is true)1737}1738++pos0;1739return sub(pos0);1740}1741}1742else // no sep was found, return the full string1743{1744return *this;1745}1746}1747else if(len == 1)1748{1749if(begins_with(sep))1750{1751return sub(0, 0);1752}1753return *this;1754}1755else // an empty string1756{1757return basic_substring();1758}1759}17601761/** return the first split from the left. Use gpop_right() to get1762* the reciprocal part. */1763basic_substring pop_left(C sep = C('/'), bool skip_empty=false) const1764{1765if(C4_LIKELY(len > 1))1766{1767auto pos = first_of(sep);1768if(pos != npos)1769{1770if(pos > 0) // does not start with sep1771{1772return sub(0, pos); // return everything up to it1773}1774else // the string starts with sep1775{1776if( ! skip_empty)1777{1778return sub(0, 0);1779}1780auto ppos = first_not_of(sep); // skip repeated seps1781if(ppos == npos) // the string is all made of seps1782{1783return sub(0, 0);1784}1785// find the next sep1786auto pos0 = first_of(sep, ppos);1787if(pos0 == npos) // only the first sep exists1788{1789return sub(0); // return the full string (because skip_empty is true)1790}1791C4_XASSERT(pos0 > 0);1792// return everything up to the second sep1793return sub(0, pos0);1794}1795}1796else // no sep was found, return the full string1797{1798return sub(0);1799}1800}1801else if(len == 1)1802{1803if(begins_with(sep))1804{1805return sub(0, 0);1806}1807return sub(0);1808}1809else // an empty string1810{1811return basic_substring();1812}1813}18141815public:18161817/** greedy pop left. eg, csubstr("a/b/c").gpop_left('/')="c" */1818basic_substring gpop_left(C sep = C('/'), bool skip_empty=false) const1819{1820auto ss = pop_right(sep, skip_empty);1821ss = left_of(ss);1822if(ss.find(sep) != npos)1823{1824if(ss.ends_with(sep))1825{1826if(skip_empty)1827{1828ss = ss.trimr(sep);1829}1830else1831{1832ss = ss.sub(0, ss.len-1); // safe to subtract because ends_with(sep) is true1833}1834}1835}1836return ss;1837}18381839/** greedy pop right. eg, csubstr("a/b/c").gpop_right('/')="a" */1840basic_substring gpop_right(C sep = C('/'), bool skip_empty=false) const1841{1842auto ss = pop_left(sep, skip_empty);1843ss = right_of(ss);1844if(ss.find(sep) != npos)1845{1846if(ss.begins_with(sep))1847{1848if(skip_empty)1849{1850ss = ss.triml(sep);1851}1852else1853{1854ss = ss.sub(1);1855}1856}1857}1858return ss;1859}18601861/** @} */18621863public:18641865/** @name Path-like manipulation methods */1866/** @{ */18671868basic_substring basename(C sep=C('/')) const1869{1870auto ss = pop_right(sep, /*skip_empty*/true);1871ss = ss.trimr(sep);1872return ss;1873}18741875basic_substring dirname(C sep=C('/')) const1876{1877auto ss = basename(sep);1878ss = ss.empty() ? *this : left_of(ss);1879return ss;1880}18811882C4_ALWAYS_INLINE basic_substring name_wo_extshort() const1883{1884return gpop_left('.');1885}18861887C4_ALWAYS_INLINE basic_substring name_wo_extlong() const1888{1889return pop_left('.');1890}18911892C4_ALWAYS_INLINE basic_substring extshort() const1893{1894return pop_right('.');1895}18961897C4_ALWAYS_INLINE basic_substring extlong() const1898{1899return gpop_right('.');1900}19011902/** @} */19031904public:19051906/** @name Content-modification methods (only for non-const C) */1907/** @{ */19081909/** convert the string to upper-case1910* @note this method requires that the string memory is writeable and is SFINAEd out for const C */1911C4_REQUIRE_RW(void) toupper()1912{1913for(size_t i = 0; i < len; ++i)1914{1915str[i] = static_cast<C>(::toupper(str[i]));1916}1917}19181919/** convert the string to lower-case1920* @note this method requires that the string memory is writeable and is SFINAEd out for const C */1921C4_REQUIRE_RW(void) tolower()1922{1923for(size_t i = 0; i < len; ++i)1924{1925str[i] = static_cast<C>(::tolower(str[i]));1926}1927}19281929public:19301931/** fill the entire contents with the given @p val1932* @note this method requires that the string memory is writeable and is SFINAEd out for const C */1933C4_REQUIRE_RW(void) fill(C val)1934{1935for(size_t i = 0; i < len; ++i)1936{1937str[i] = val;1938}1939}19401941public:19421943/** set the current substring to a copy of the given csubstr1944* @note this method requires that the string memory is writeable and is SFINAEd out for const C */1945C4_REQUIRE_RW(void) copy_from(ro_substr that, size_t ifirst=0, size_t num=npos)1946{1947C4_ASSERT(ifirst >= 0 && ifirst <= len);1948num = num != npos ? num : len - ifirst;1949num = num < that.len ? num : that.len;1950C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len);1951// calling memcpy with null strings is undefined behavior1952// and will wreak havoc in calling code's branches.1953// see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-12621336371954if(num)1955memcpy(str + sizeof(C) * ifirst, that.str, sizeof(C) * num);1956}19571958public:19591960/** reverse in place1961* @note this method requires that the string memory is writeable and is SFINAEd out for const C */1962C4_REQUIRE_RW(void) reverse()1963{1964if(len == 0) return;1965detail::_do_reverse(str, str + len - 1);1966}19671968/** revert a subpart in place1969* @note this method requires that the string memory is writeable and is SFINAEd out for const C */1970C4_REQUIRE_RW(void) reverse_sub(size_t ifirst, size_t num)1971{1972C4_ASSERT(ifirst >= 0 && ifirst <= len);1973C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len);1974if(num == 0) return;1975detail::_do_reverse(str + ifirst, str + ifirst + num - 1);1976}19771978/** revert a range in place1979* @note this method requires that the string memory is writeable and is SFINAEd out for const C */1980C4_REQUIRE_RW(void) reverse_range(size_t ifirst, size_t ilast)1981{1982C4_ASSERT(ifirst >= 0 && ifirst <= len);1983C4_ASSERT(ilast >= 0 && ilast <= len);1984if(ifirst == ilast) return;1985detail::_do_reverse(str + ifirst, str + ilast - 1);1986}19871988public:19891990/** erase part of the string. eg, with char s[] = "0123456789",1991* substr(s).erase(3, 2) = "01256789", and s is now "01245678989"1992* @note this method requires that the string memory is writeable and is SFINAEd out for const C */1993C4_REQUIRE_RW(basic_substring) erase(size_t pos, size_t num)1994{1995C4_ASSERT(pos >= 0 && pos+num <= len);1996size_t num_to_move = len - pos - num;1997memmove(str + pos, str + pos + num, sizeof(C) * num_to_move);1998return basic_substring{str, len - num};1999}20002001/** @note this method requires that the string memory is writeable and is SFINAEd out for const C */2002C4_REQUIRE_RW(basic_substring) erase_range(size_t first, size_t last)2003{2004C4_ASSERT(first <= last);2005return erase(first, static_cast<size_t>(last-first));2006}20072008/** erase a part of the string.2009* @note @p sub must be a substring of this string2010* @note this method requires that the string memory is writeable and is SFINAEd out for const C */2011C4_REQUIRE_RW(basic_substring) erase(ro_substr sub)2012{2013C4_ASSERT(is_super(sub));2014C4_ASSERT(sub.str >= str);2015return erase(static_cast<size_t>(sub.str - str), sub.len);2016}20172018public:20192020/** replace every occurrence of character @p value with the character @p repl2021* @return the number of characters that were replaced2022* @note this method requires that the string memory is writeable and is SFINAEd out for const C */2023C4_REQUIRE_RW(size_t) replace(C value, C repl, size_t pos=0)2024{2025C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);2026size_t did_it = 0;2027while((pos = find(value, pos)) != npos)2028{2029str[pos++] = repl;2030++did_it;2031}2032return did_it;2033}20342035/** replace every occurrence of each character in @p value with2036* the character @p repl.2037* @return the number of characters that were replaced2038* @note this method requires that the string memory is writeable and is SFINAEd out for const C */2039C4_REQUIRE_RW(size_t) replace(ro_substr chars, C repl, size_t pos=0)2040{2041C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);2042size_t did_it = 0;2043while((pos = first_of(chars, pos)) != npos)2044{2045str[pos++] = repl;2046++did_it;2047}2048return did_it;2049}20502051/** replace @p pattern with @p repl, and write the result into2052* @dst. pattern and repl don't need equal sizes.2053*2054* @return the required size for dst. No overflow occurs if2055* dst.len is smaller than the required size; this can be used to2056* determine the required size for an existing container. */2057size_t replace_all(rw_substr dst, ro_substr pattern, ro_substr repl, size_t pos=0) const2058{2059C4_ASSERT( ! pattern.empty()); //!< @todo relax this precondition2060C4_ASSERT( ! this ->overlaps(dst)); //!< @todo relax this precondition2061C4_ASSERT( ! pattern.overlaps(dst));2062C4_ASSERT( ! repl .overlaps(dst));2063C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);2064C4_SUPPRESS_WARNING_GCC_PUSH2065C4_SUPPRESS_WARNING_GCC("-Warray-bounds") // gcc11 has a false positive here2066#if (!defined(__clang__)) && (defined(__GNUC__) && (__GNUC__ >= 7))2067C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow") // gcc11 has a false positive here2068#endif2069#define _c4append(first, last) \2070{ \2071C4_ASSERT((last) >= (first)); \2072size_t num = static_cast<size_t>((last) - (first)); \2073if(num > 0 && sz + num <= dst.len) \2074{ \2075memcpy(dst.str + sz, first, num * sizeof(C)); \2076} \2077sz += num; \2078}2079size_t sz = 0;2080size_t b = pos;2081_c4append(str, str + pos);2082do {2083size_t e = find(pattern, b);2084if(e == npos)2085{2086_c4append(str + b, str + len);2087break;2088}2089_c4append(str + b, str + e);2090_c4append(repl.begin(), repl.end());2091b = e + pattern.size();2092} while(b < len && b != npos);2093return sz;2094#undef _c4append2095C4_SUPPRESS_WARNING_GCC_POP2096}20972098/** @} */20992100}; // template class basic_substring210121022103#undef C4_REQUIRE_RW210421052106//-----------------------------------------------------------------------------2107//-----------------------------------------------------------------------------2108//-----------------------------------------------------------------------------210921102111/** @name Adapter functions. to_substr() and to_csubstr() is used in2112* generic code like format(), and allow adding construction of2113* substrings from new types like containers. */2114/** @{ */211521162117/** neutral version for use in generic code */2118C4_ALWAYS_INLINE substr to_substr(substr s) noexcept { return s; }2119/** neutral version for use in generic code */2120C4_ALWAYS_INLINE csubstr to_csubstr(substr s) noexcept { return s; }2121/** neutral version for use in generic code */2122C4_ALWAYS_INLINE csubstr to_csubstr(csubstr s) noexcept { return s; }212321242125template<size_t N>2126C4_ALWAYS_INLINE substr2127to_substr(char (&s)[N]) noexcept { substr ss(s, N-1); return ss; }2128template<size_t N>2129C4_ALWAYS_INLINE csubstr2130to_csubstr(const char (&s)[N]) noexcept { csubstr ss(s, N-1); return ss; }213121322133/** @note this overload uses SFINAE to prevent it from overriding the array overload2134* @see For a more detailed explanation on why the plain overloads cannot2135* coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */2136template<class U>2137C4_ALWAYS_INLINE typename std::enable_if<std::is_same<U, char*>::value, substr>::type2138to_substr(U s) noexcept { substr ss(s); return ss; }2139/** @note this overload uses SFINAE to prevent it from overriding the array overload2140* @see For a more detailed explanation on why the plain overloads cannot2141* coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */2142template<class U>2143C4_ALWAYS_INLINE typename std::enable_if<std::is_same<U, const char*>::value || std::is_same<U, char*>::value, csubstr>::type2144to_csubstr(U s) noexcept { csubstr ss(s); return ss; }214521462147/** @} */214821492150//-----------------------------------------------------------------------------2151//-----------------------------------------------------------------------------2152//-----------------------------------------------------------------------------21532154template<typename C, size_t N> inline bool operator== (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) == 0; }2155template<typename C, size_t N> inline bool operator!= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) != 0; }2156template<typename C, size_t N> inline bool operator< (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) > 0; }2157template<typename C, size_t N> inline bool operator> (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) < 0; }2158template<typename C, size_t N> inline bool operator<= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) >= 0; }2159template<typename C, size_t N> inline bool operator>= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) <= 0; }21602161template<typename C> inline bool operator== (const char c, basic_substring<C> const that) noexcept { return that.compare(c) == 0; }2162template<typename C> inline bool operator!= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) != 0; }2163template<typename C> inline bool operator< (const char c, basic_substring<C> const that) noexcept { return that.compare(c) > 0; }2164template<typename C> inline bool operator> (const char c, basic_substring<C> const that) noexcept { return that.compare(c) < 0; }2165template<typename C> inline bool operator<= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) >= 0; }2166template<typename C> inline bool operator>= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) <= 0; }216721682169//-----------------------------------------------------------------------------2170//-----------------------------------------------------------------------------2171//-----------------------------------------------------------------------------21722173/** @define C4_SUBSTR_NO_OSTREAM_LSHIFT doctest does not deal well with2174* template operator<<2175* @see https://github.com/onqtam/doctest/pull/431 */2176#ifndef C4_SUBSTR_NO_OSTREAM_LSHIFT2177#ifdef __clang__2178# pragma clang diagnostic push2179# pragma clang diagnostic ignored "-Wsign-conversion"2180#elif defined(__GNUC__)2181# pragma GCC diagnostic push2182# pragma GCC diagnostic ignored "-Wsign-conversion"2183#endif21842185/** output the string to a stream */2186template<class OStream, class C>2187inline OStream& operator<< (OStream& os, basic_substring<C> s)2188{2189os.write(s.str, s.len);2190return os;2191}21922193// this causes ambiguity2194///** this is used by google test */2195//template<class OStream, class C>2196//inline void PrintTo(basic_substring<C> s, OStream* os)2197//{2198// os->write(s.str, s.len);2199//}22002201#ifdef __clang__2202# pragma clang diagnostic pop2203#elif defined(__GNUC__)2204# pragma GCC diagnostic pop2205#endif2206#endif // !C4_SUBSTR_NO_OSTREAM_LSHIFT22072208} // namespace c4220922102211#ifdef __clang__2212# pragma clang diagnostic pop2213#elif defined(__GNUC__)2214# pragma GCC diagnostic pop2215#endif22162217#endif /* _C4_SUBSTR_HPP_ */221822192220