Path: blob/master/dep/rapidyaml/src/c4/yml/parse.cpp
4262 views
#include "c4/yml/parse.hpp"1#include "c4/error.hpp"2#include "c4/utf.hpp"3#include <c4/dump.hpp>45#include <ctype.h>6#include <stdarg.h>7#include <stdio.h>89#include "c4/yml/detail/parser_dbg.hpp"10#ifdef RYML_DBG11#include "c4/yml/detail/print.hpp"12#endif1314#ifndef RYML_ERRMSG_SIZE15#define RYML_ERRMSG_SIZE 102416#endif1718//#define RYML_WITH_TAB_TOKENS19#ifdef RYML_WITH_TAB_TOKENS20#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__21#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with22#else23#define _RYML_WITH_TAB_TOKENS(...)24#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without25#endif262728#if defined(_MSC_VER)29# pragma warning(push)30# pragma warning(disable: 4296/*expression is always 'boolean_value'*/)31#elif defined(__clang__)32# pragma clang diagnostic push33# pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.34# pragma clang diagnostic ignored "-Wformat-nonliteral"35# pragma clang diagnostic ignored "-Wold-style-cast"36#elif defined(__GNUC__)37# pragma GCC diagnostic push38# pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.39# pragma GCC diagnostic ignored "-Wformat-nonliteral"40# pragma GCC diagnostic ignored "-Wold-style-cast"41# if __GNUC__ >= 742# pragma GCC diagnostic ignored "-Wduplicated-branches"43# endif44#endif4546namespace c4 {47namespace yml {4849namespace {5051template<class DumpFn, class ...Args>52void _parse_dump(DumpFn dumpfn, c4::csubstr fmt, Args&& ...args)53{54char writebuf[256];55auto results = c4::format_dump_resume(dumpfn, writebuf, fmt, std::forward<Args>(args)...);56// resume writing if the results failed to fit the buffer57if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte.58{59results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward<Args>(args)...);60if(C4_UNLIKELY(results.bufsize > sizeof(writebuf)))61{62results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward<Args>(args)...);63}64}65}6667bool _is_scalar_next__runk(csubstr s)68{69return !(s.begins_with(": ") || s.begins_with_any("#,{}[]%&") || s.begins_with("? ") || s == "-" || s.begins_with("- ") || s.begins_with(":\"") || s.begins_with(":'"));70}7172bool _is_scalar_next__rseq_rval(csubstr s)73{74return !(s.begins_with_any("[{!&") || s.begins_with("? ") || s.begins_with("- ") || s == "-");75}7677bool _is_scalar_next__rmap(csubstr s)78{79return !(s.begins_with(": ") || s.begins_with_any("#,!&") || s.begins_with("? ") _RYML_WITH_TAB_TOKENS(|| s.begins_with(":\t")));80}8182bool _is_scalar_next__rmap_val(csubstr s)83{84return !(s.begins_with("- ") || s.begins_with_any("{[") || s == "-");85}8687bool _is_doc_sep(csubstr s)88{89constexpr const csubstr dashes = "---";90constexpr const csubstr ellipsis = "...";91constexpr const csubstr whitesp = " \t";92if(s.begins_with(dashes))93return s == dashes || s.sub(3).begins_with_any(whitesp);94else if(s.begins_with(ellipsis))95return s == ellipsis || s.sub(3).begins_with_any(whitesp);96return false;97}9899/** @p i is set to the first non whitespace character after the line100* @return the number of empty lines after the initial position */101size_t count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)102{103RYML_ASSERT(r[*i] == '\n');104size_t numnl_following = 0;105++(*i);106for( ; *i < r.len; ++(*i))107{108if(r.str[*i] == '\n')109{110++numnl_following;111if(indentation) // skip the indentation after the newline112{113size_t stop = *i + indentation;114for( ; *i < r.len; ++(*i))115{116if(r.str[*i] != ' ' && r.str[*i] != '\r')117break;118RYML_ASSERT(*i < stop);119}120C4_UNUSED(stop);121}122}123else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r') // skip leading whitespace124;125else126break;127}128return numnl_following;129}130131} // anon namespace132133134//-----------------------------------------------------------------------------135136Parser::~Parser()137{138_free();139_clr();140}141142Parser::Parser(Callbacks const& cb, ParserOptions opts)143: m_options(opts)144, m_file()145, m_buf()146, m_root_id(NONE)147, m_tree()148, m_stack(cb)149, m_state()150, m_key_tag_indentation(0)151, m_key_tag2_indentation(0)152, m_key_tag()153, m_key_tag2()154, m_val_tag_indentation(0)155, m_val_tag()156, m_key_anchor_was_before(false)157, m_key_anchor_indentation(0)158, m_key_anchor()159, m_val_anchor_indentation(0)160, m_val_anchor()161, m_filter_arena()162, m_newline_offsets()163, m_newline_offsets_size(0)164, m_newline_offsets_capacity(0)165, m_newline_offsets_buf()166{167m_stack.push(State{});168m_state = &m_stack.top();169}170171Parser::Parser(Parser &&that)172: m_options(that.m_options)173, m_file(that.m_file)174, m_buf(that.m_buf)175, m_root_id(that.m_root_id)176, m_tree(that.m_tree)177, m_stack(std::move(that.m_stack))178, m_state(&m_stack.top())179, m_key_tag_indentation(that.m_key_tag_indentation)180, m_key_tag2_indentation(that.m_key_tag2_indentation)181, m_key_tag(that.m_key_tag)182, m_key_tag2(that.m_key_tag2)183, m_val_tag_indentation(that.m_val_tag_indentation)184, m_val_tag(that.m_val_tag)185, m_key_anchor_was_before(that.m_key_anchor_was_before)186, m_key_anchor_indentation(that.m_key_anchor_indentation)187, m_key_anchor(that.m_key_anchor)188, m_val_anchor_indentation(that.m_val_anchor_indentation)189, m_val_anchor(that.m_val_anchor)190, m_filter_arena(that.m_filter_arena)191, m_newline_offsets(that.m_newline_offsets)192, m_newline_offsets_size(that.m_newline_offsets_size)193, m_newline_offsets_capacity(that.m_newline_offsets_capacity)194, m_newline_offsets_buf(that.m_newline_offsets_buf)195{196that._clr();197}198199Parser::Parser(Parser const& that)200: m_options(that.m_options)201, m_file(that.m_file)202, m_buf(that.m_buf)203, m_root_id(that.m_root_id)204, m_tree(that.m_tree)205, m_stack(that.m_stack)206, m_state(&m_stack.top())207, m_key_tag_indentation(that.m_key_tag_indentation)208, m_key_tag2_indentation(that.m_key_tag2_indentation)209, m_key_tag(that.m_key_tag)210, m_key_tag2(that.m_key_tag2)211, m_val_tag_indentation(that.m_val_tag_indentation)212, m_val_tag(that.m_val_tag)213, m_key_anchor_was_before(that.m_key_anchor_was_before)214, m_key_anchor_indentation(that.m_key_anchor_indentation)215, m_key_anchor(that.m_key_anchor)216, m_val_anchor_indentation(that.m_val_anchor_indentation)217, m_val_anchor(that.m_val_anchor)218, m_filter_arena()219, m_newline_offsets()220, m_newline_offsets_size()221, m_newline_offsets_capacity()222, m_newline_offsets_buf()223{224if(that.m_newline_offsets_capacity)225{226_resize_locations(that.m_newline_offsets_capacity);227_RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);228memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));229m_newline_offsets_size = that.m_newline_offsets_size;230}231if(that.m_filter_arena.len)232{233_resize_filter_arena(that.m_filter_arena.len);234}235}236237Parser& Parser::operator=(Parser &&that)238{239_free();240m_options = (that.m_options);241m_file = (that.m_file);242m_buf = (that.m_buf);243m_root_id = (that.m_root_id);244m_tree = (that.m_tree);245m_stack = std::move(that.m_stack);246m_state = (&m_stack.top());247m_key_tag_indentation = (that.m_key_tag_indentation);248m_key_tag2_indentation = (that.m_key_tag2_indentation);249m_key_tag = (that.m_key_tag);250m_key_tag2 = (that.m_key_tag2);251m_val_tag_indentation = (that.m_val_tag_indentation);252m_val_tag = (that.m_val_tag);253m_key_anchor_was_before = (that.m_key_anchor_was_before);254m_key_anchor_indentation = (that.m_key_anchor_indentation);255m_key_anchor = (that.m_key_anchor);256m_val_anchor_indentation = (that.m_val_anchor_indentation);257m_val_anchor = (that.m_val_anchor);258m_filter_arena = that.m_filter_arena;259m_newline_offsets = (that.m_newline_offsets);260m_newline_offsets_size = (that.m_newline_offsets_size);261m_newline_offsets_capacity = (that.m_newline_offsets_capacity);262m_newline_offsets_buf = (that.m_newline_offsets_buf);263that._clr();264return *this;265}266267Parser& Parser::operator=(Parser const& that)268{269_free();270m_options = (that.m_options);271m_file = (that.m_file);272m_buf = (that.m_buf);273m_root_id = (that.m_root_id);274m_tree = (that.m_tree);275m_stack = that.m_stack;276m_state = &m_stack.top();277m_key_tag_indentation = (that.m_key_tag_indentation);278m_key_tag2_indentation = (that.m_key_tag2_indentation);279m_key_tag = (that.m_key_tag);280m_key_tag2 = (that.m_key_tag2);281m_val_tag_indentation = (that.m_val_tag_indentation);282m_val_tag = (that.m_val_tag);283m_key_anchor_was_before = (that.m_key_anchor_was_before);284m_key_anchor_indentation = (that.m_key_anchor_indentation);285m_key_anchor = (that.m_key_anchor);286m_val_anchor_indentation = (that.m_val_anchor_indentation);287m_val_anchor = (that.m_val_anchor);288if(that.m_filter_arena.len > 0)289_resize_filter_arena(that.m_filter_arena.len);290if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)291_resize_locations(that.m_newline_offsets_capacity);292_RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);293_RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);294memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));295m_newline_offsets_size = that.m_newline_offsets_size;296m_newline_offsets_buf = that.m_newline_offsets_buf;297return *this;298}299300void Parser::_clr()301{302m_options = {};303m_file = {};304m_buf = {};305m_root_id = {};306m_tree = {};307m_stack.clear();308m_state = {};309m_key_tag_indentation = {};310m_key_tag2_indentation = {};311m_key_tag = {};312m_key_tag2 = {};313m_val_tag_indentation = {};314m_val_tag = {};315m_key_anchor_was_before = {};316m_key_anchor_indentation = {};317m_key_anchor = {};318m_val_anchor_indentation = {};319m_val_anchor = {};320m_filter_arena = {};321m_newline_offsets = {};322m_newline_offsets_size = {};323m_newline_offsets_capacity = {};324m_newline_offsets_buf = {};325}326327void Parser::_free()328{329if(m_newline_offsets)330{331_RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);332m_newline_offsets = nullptr;333m_newline_offsets_size = 0u;334m_newline_offsets_capacity = 0u;335m_newline_offsets_buf = 0u;336}337if(m_filter_arena.len)338{339_RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len);340m_filter_arena = {};341}342m_stack._free();343}344345346//-----------------------------------------------------------------------------347void Parser::_reset()348{349_RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() == 1);350m_stack.clear();351m_stack.push({});352m_state = &m_stack.top();353m_state->reset(m_file.str, m_root_id);354355m_key_tag_indentation = 0;356m_key_tag2_indentation = 0;357m_key_tag.clear();358m_key_tag2.clear();359m_val_tag_indentation = 0;360m_val_tag.clear();361m_key_anchor_was_before = false;362m_key_anchor_indentation = 0;363m_key_anchor.clear();364m_val_anchor_indentation = 0;365m_val_anchor.clear();366367if(m_options.locations())368{369_prepare_locations();370}371}372373//-----------------------------------------------------------------------------374template<class DumpFn>375void Parser::_fmt_msg(DumpFn &&dumpfn) const376{377auto const& lc = m_state->line_contents;378csubstr contents = lc.stripped;379if(contents.len)380{381// print the yaml src line382size_t offs = 3u + to_chars(substr{}, m_state->pos.line) + to_chars(substr{}, m_state->pos.col);383if(m_file.len)384{385_parse_dump(dumpfn, "{}:", m_file);386offs += m_file.len + 1;387}388_parse_dump(dumpfn, "{}:{}: ", m_state->pos.line, m_state->pos.col);389csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));390csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));391_parse_dump(dumpfn, "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);392// highlight the remaining portion of the previous line393size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());394size_t lastcol = firstcol + lc.rem.len;395for(size_t i = 0; i < offs + firstcol; ++i)396dumpfn(" ");397dumpfn("^");398for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)399dumpfn("~");400_parse_dump(dumpfn, "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);401}402else403{404dumpfn("\n");405}406407#ifdef RYML_DBG408// next line: print the state flags409{410char flagbuf_[64];411_parse_dump(dumpfn, "top state: {}\n", _prfl(flagbuf_, m_state->flags));412}413#endif414}415416417//-----------------------------------------------------------------------------418template<class ...Args>419void Parser::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const420{421char errmsg[RYML_ERRMSG_SIZE];422detail::_SubstrWriter writer(errmsg);423auto dumpfn = [&writer](csubstr s){ writer.append(s); };424_parse_dump(dumpfn, fmt, args...);425writer.append('\n');426_fmt_msg(dumpfn);427size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE;428m_tree->m_callbacks.m_error(errmsg, len, m_state->pos, m_tree->m_callbacks.m_user_data);429}430431//-----------------------------------------------------------------------------432#ifdef RYML_DBG433template<class ...Args>434void Parser::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const435{436auto dumpfn = [](csubstr s){ fwrite(s.str, 1, s.len, stdout); };437_parse_dump(dumpfn, fmt, args...);438dumpfn("\n");439_fmt_msg(dumpfn);440}441#endif442443//-----------------------------------------------------------------------------444bool Parser::_finished_file() const445{446bool ret = m_state->pos.offset >= m_buf.len;447if(ret)448{449_c4dbgp("finished file!!!");450}451return ret;452}453454//-----------------------------------------------------------------------------455bool Parser::_finished_line() const456{457return m_state->line_contents.rem.empty();458}459460//-----------------------------------------------------------------------------461void Parser::parse_in_place(csubstr file, substr buf, Tree *t, size_t node_id)462{463m_file = file;464m_buf = buf;465m_root_id = node_id;466m_tree = t;467_reset();468while( ! _finished_file())469{470_scan_line();471while( ! _finished_line())472_handle_line();473if(_finished_file())474break; // it may have finished because of multiline blocks475_line_ended();476}477_handle_finished_file();478}479480//-----------------------------------------------------------------------------481void Parser::_handle_finished_file()482{483_end_stream();484}485486//-----------------------------------------------------------------------------487void Parser::_handle_line()488{489_c4dbgq("\n-----------");490_c4dbgt("handling line={}, offset={}B", m_state->pos.line, m_state->pos.offset);491_RYML_CB_ASSERT(m_stack.m_callbacks, ! m_state->line_contents.rem.empty());492if(has_any(RSEQ))493{494if(has_any(FLOW))495{496if(_handle_seq_flow())497return;498}499else500{501if(_handle_seq_blck())502return;503}504}505else if(has_any(RMAP))506{507if(has_any(FLOW))508{509if(_handle_map_flow())510return;511}512else513{514if(_handle_map_blck())515return;516}517}518else if(has_any(RUNK))519{520if(_handle_unk())521return;522}523524if(_handle_top())525return;526}527528529//-----------------------------------------------------------------------------530bool Parser::_handle_unk()531{532_c4dbgp("handle_unk");533534csubstr rem = m_state->line_contents.rem;535const bool start_as_child = (node(m_state) == nullptr);536537if(C4_UNLIKELY(has_any(NDOC)))538{539if(rem == "---" || rem.begins_with("--- "))540{541_start_new_doc(rem);542return true;543}544auto trimmed = rem.triml(' ');545if(trimmed == "---" || trimmed.begins_with("--- "))546{547_RYML_CB_ASSERT(m_stack.m_callbacks, rem.len >= trimmed.len);548_line_progressed(rem.len - trimmed.len);549_start_new_doc(trimmed);550_save_indentation();551return true;552}553else if(trimmed.begins_with("..."))554{555_end_stream();556}557else if(trimmed.first_of("#%") == csubstr::npos) // neither a doc nor a tag558{559_c4dbgpf("starting implicit doc to accomodate unexpected tokens: '{}'", rem);560size_t indref = m_state->indref;561_push_level();562_start_doc();563_set_indentation(indref);564}565_RYML_CB_ASSERT(m_stack.m_callbacks, !trimmed.empty());566}567568_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));569if(m_state->indref > 0)570{571csubstr ws = rem.left_of(rem.first_not_of(' '));572if(m_state->indref <= ws.len)573{574_c4dbgpf("skipping base indentation of {}", m_state->indref);575_line_progressed(m_state->indref);576rem = rem.sub(m_state->indref);577}578}579580if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t")))581{582_c4dbgpf("it's a seq (as_child={})", start_as_child);583_move_key_anchor_to_val_anchor();584_move_key_tag_to_val_tag();585_push_level();586_start_seq(start_as_child);587_save_indentation();588_line_progressed(2);589return true;590}591else if(rem == '-')592{593_c4dbgpf("it's a seq (as_child={})", start_as_child);594_move_key_anchor_to_val_anchor();595_move_key_tag_to_val_tag();596_push_level();597_start_seq(start_as_child);598_save_indentation();599_line_progressed(1);600return true;601}602else if(rem.begins_with('['))603{604_c4dbgpf("it's a seq, flow (as_child={})", start_as_child);605_move_key_anchor_to_val_anchor();606_move_key_tag_to_val_tag();607_push_level(/*explicit flow*/true);608_start_seq(start_as_child);609add_flags(FLOW);610_line_progressed(1);611return true;612}613else if(rem.begins_with('{'))614{615_c4dbgpf("it's a map, flow (as_child={})", start_as_child);616_move_key_anchor_to_val_anchor();617_move_key_tag_to_val_tag();618_push_level(/*explicit flow*/true);619_start_map(start_as_child);620addrem_flags(FLOW|RKEY, RVAL);621_line_progressed(1);622return true;623}624else if(rem.begins_with("? "))625{626_c4dbgpf("it's a map (as_child={}) + this key is complex", start_as_child);627_move_key_anchor_to_val_anchor();628_move_key_tag_to_val_tag();629_push_level();630_start_map(start_as_child);631addrem_flags(RKEY|QMRK, RVAL);632_save_indentation();633_line_progressed(2);634return true;635}636else if(rem.begins_with(": ") && !has_any(SSCL))637{638_c4dbgp("it's a map with an empty key");639_move_key_anchor_to_val_anchor();640_move_key_tag_to_val_tag();641_push_level();642_start_map(start_as_child);643_store_scalar_null(rem.str);644addrem_flags(RVAL, RKEY);645_save_indentation();646_line_progressed(2);647return true;648}649else if(rem == ':' && !has_any(SSCL))650{651_c4dbgp("it's a map with an empty key");652_move_key_anchor_to_val_anchor();653_move_key_tag_to_val_tag();654_push_level();655_start_map(start_as_child);656_store_scalar_null(rem.str);657addrem_flags(RVAL, RKEY);658_save_indentation();659_line_progressed(1);660return true;661}662else if(_handle_types())663{664return true;665}666else if(!rem.begins_with('*') && _handle_key_anchors_and_refs())667{668return true;669}670else if(has_any(SSCL))671{672_c4dbgpf("there's a stored scalar: '{}'", m_state->scalar);673674csubstr saved_scalar;675bool is_quoted = false;676if(_scan_scalar_unk(&saved_scalar, &is_quoted))677{678rem = m_state->line_contents.rem;679_c4dbgpf("... and there's also a scalar next! '{}'", saved_scalar);680if(rem.begins_with_any(" \t"))681{682size_t n = rem.first_not_of(" \t");683_c4dbgpf("skipping {} spaces/tabs", n);684rem = rem.sub(n);685_line_progressed(n);686}687}688689_c4dbgpf("rem='{}'", rem);690691if(rem.begins_with(", "))692{693_c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child);694_start_seq(start_as_child);695add_flags(FLOW);696_append_val(_consume_scalar());697_line_progressed(2);698}699else if(rem.begins_with(','))700{701_c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child);702_start_seq(start_as_child);703add_flags(FLOW);704_append_val(_consume_scalar());705_line_progressed(1);706}707else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))708{709_c4dbgpf("got a ': ' -- it's a map (as_child={})", start_as_child);710_start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair711_line_progressed(2);712}713else if(rem == ":" || rem.begins_with(":\"") || rem.begins_with(":'"))714{715if(rem == ":") { _c4dbgpf("got a ':' -- it's a map (as_child={})", start_as_child); }716else { _c4dbgpf("got a '{}' -- it's a map (as_child={})", rem.first(2), start_as_child); }717_start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair718_line_progressed(1); // advance only 1719}720#ifdef RYML_NO_COVERAGE__TO_BE_DELETED721else if(rem.begins_with('}'))722{723if(!has_all(RMAP|FLOW))724{725_c4err("invalid token: not reading a map");726}727if(!has_all(SSCL))728{729_c4err("no scalar stored");730}731_append_key_val(saved_scalar, is_quoted);732_stop_map();733_line_progressed(1);734saved_scalar.clear();735is_quoted = false;736}737#endif738else if(rem.begins_with("..."))739{740_c4dbgp("got stream end '...'");741_end_stream();742_line_progressed(3);743}744else if(rem.begins_with('#'))745{746_c4dbgpf("it's a comment: '{}'", rem);747_scan_comment();748return true;749}750else if(_handle_key_anchors_and_refs())751{752return true;753}754else if(rem.begins_with(" ") || rem.begins_with("\t"))755{756size_t n = rem.first_not_of(" \t");757if(n == npos)758n = rem.len;759_c4dbgpf("has {} spaces/tabs, skip...", n);760_line_progressed(n);761return true;762}763else if(rem.empty())764{765// nothing to do766}767else if(rem == "---" || rem.begins_with("--- "))768{769_c4dbgp("caught ---: starting doc");770_start_new_doc(rem);771return true;772}773else if(rem.begins_with('%'))774{775_c4dbgp("caught a directive: ignoring...");776_line_progressed(rem.len);777return true;778}779else780{781_c4err("parse error");782}783784if(is_quoted || (! saved_scalar.empty()))785{786_store_scalar(saved_scalar, is_quoted);787}788789return true;790}791else792{793_RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL));794csubstr scalar;795size_t indentation = m_state->line_contents.indentation; // save796bool is_quoted;797if(_scan_scalar_unk(&scalar, &is_quoted))798{799_c4dbgpf("got a {} scalar", is_quoted ? "quoted" : "");800rem = m_state->line_contents.rem;801{802size_t first = rem.first_not_of(" \t");803if(first && first != npos)804{805_c4dbgpf("skip {} whitespace characters", first);806_line_progressed(first);807rem = rem.sub(first);808}809}810_store_scalar(scalar, is_quoted);811if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))812{813_c4dbgpf("got a ': ' next -- it's a map (as_child={})", start_as_child);814_push_level();815_start_map(start_as_child); // wait for the val scalar to append the key-val pair816_set_indentation(indentation);817_line_progressed(2); // call this AFTER saving the indentation818}819else if(rem.begins_with(':'))820{821_c4dbgpf("got a ':' next -- it's a map (as_child={})", start_as_child);822_push_level();823_start_map(start_as_child); // wait for the val scalar to append the key-val pair824_set_indentation(indentation);825_line_progressed(1); // call this AFTER saving the indentation826}827else828{829// we still don't know whether it's a seq or a map830// so just store the scalar831}832return true;833}834else if(rem.begins_with_any(" \t"))835{836csubstr ws = rem.left_of(rem.first_not_of(" \t"));837rem = rem.right_of(ws);838if(has_all(RTOP) && rem.begins_with("---"))839{840_c4dbgp("there's a doc starting, and it's indented");841_set_indentation(ws.len);842}843_c4dbgpf("skipping {} spaces/tabs", ws.len);844_line_progressed(ws.len);845return true;846}847}848849return false;850}851852853//-----------------------------------------------------------------------------854C4_ALWAYS_INLINE void Parser::_skipchars(char c)855{856_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with(c));857size_t pos = m_state->line_contents.rem.first_not_of(c);858if(pos == npos)859pos = m_state->line_contents.rem.len; // maybe the line is just whitespace860_c4dbgpf("skip {} '{}'", pos, c);861_line_progressed(pos);862}863864template<size_t N>865C4_ALWAYS_INLINE void Parser::_skipchars(const char (&chars)[N])866{867_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with_any(chars));868size_t pos = m_state->line_contents.rem.first_not_of(chars);869if(pos == npos)870pos = m_state->line_contents.rem.len; // maybe the line is just whitespace871_c4dbgpf("skip {} characters", pos);872_line_progressed(pos);873}874875876//-----------------------------------------------------------------------------877bool Parser::_handle_seq_flow()878{879_c4dbgpf("handle_seq_flow: node_id={} level={}", m_state->node_id, m_state->level);880csubstr rem = m_state->line_contents.rem;881882_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));883_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW));884885if(rem.begins_with(' '))886{887// with explicit flow, indentation does not matter888_c4dbgp("starts with spaces");889_skipchars(' ');890return true;891}892_RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t'))893{894_c4dbgp("starts with tabs");895_skipchars('\t');896return true;897})898else if(rem.begins_with('#'))899{900_c4dbgp("it's a comment");901rem = _scan_comment(); // also progresses the line902return true;903}904else if(rem.begins_with(']'))905{906_c4dbgp("end the sequence");907_pop_level();908_line_progressed(1);909if(has_all(RSEQIMAP))910{911_stop_seqimap();912_pop_level();913}914return true;915}916917if(has_any(RVAL))918{919_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));920bool is_quoted;921if(_scan_scalar_seq_flow(&rem, &is_quoted))922{923_c4dbgp("it's a scalar");924addrem_flags(RNXT, RVAL);925_append_val(rem, is_quoted);926return true;927}928else if(rem.begins_with('['))929{930_c4dbgp("val is a child seq");931addrem_flags(RNXT, RVAL); // before _push_level!932_push_level(/*explicit flow*/true);933_start_seq();934add_flags(FLOW);935_line_progressed(1);936return true;937}938else if(rem.begins_with('{'))939{940_c4dbgp("val is a child map");941addrem_flags(RNXT, RVAL); // before _push_level!942_push_level(/*explicit flow*/true);943_start_map();944addrem_flags(FLOW|RKEY, RVAL);945_line_progressed(1);946return true;947}948else if(rem == ':')949{950_c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id);951_start_seqimap();952_line_progressed(1);953return true;954}955else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))956{957_c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id);958_start_seqimap();959_line_progressed(2);960return true;961}962else if(rem.begins_with("? "))963{964_c4dbgpf("found '? ' -- there's an implicit map in the seq node[{}]", m_state->node_id);965_start_seqimap();966_line_progressed(2);967_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(SSCL) && m_state->scalar == "");968addrem_flags(QMRK|RKEY, RVAL|SSCL);969return true;970}971else if(_handle_types())972{973return true;974}975else if(_handle_val_anchors_and_refs())976{977return true;978}979else if(rem.begins_with(", "))980{981_c4dbgp("found ',' -- the value was null");982_append_val_null(rem.str - 1);983_line_progressed(2);984return true;985}986else if(rem.begins_with(','))987{988_c4dbgp("found ',' -- the value was null");989_append_val_null(rem.str - 1);990_line_progressed(1);991return true;992}993else if(rem.begins_with('\t'))994{995_skipchars('\t');996return true;997}998else999{1000_c4err("parse error");1001}1002}1003else if(has_any(RNXT))1004{1005_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));1006if(rem.begins_with(", "))1007{1008_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW));1009_c4dbgp("seq: expect next val");1010addrem_flags(RVAL, RNXT);1011_line_progressed(2);1012return true;1013}1014else if(rem.begins_with(','))1015{1016_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW));1017_c4dbgp("seq: expect next val");1018addrem_flags(RVAL, RNXT);1019_line_progressed(1);1020return true;1021}1022else if(rem == ':')1023{1024_c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id);1025_start_seqimap();1026_line_progressed(1);1027return true;1028}1029else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))1030{1031_c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id);1032_start_seqimap();1033_line_progressed(2);1034return true;1035}1036else1037{1038_c4err("was expecting a comma");1039}1040}1041else1042{1043_c4err("internal error");1044}10451046return true;1047}10481049//-----------------------------------------------------------------------------1050bool Parser::_handle_seq_blck()1051{1052_c4dbgpf("handle_seq_impl: node_id={} level={}", m_state->node_id, m_state->level);1053csubstr rem = m_state->line_contents.rem;10541055_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ));1056_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));1057_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW));10581059if(rem.begins_with('#'))1060{1061_c4dbgp("it's a comment");1062rem = _scan_comment();1063return true;1064}1065if(has_any(RNXT))1066{1067_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));10681069if(_handle_indentation())1070return true;10711072if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t")))1073{1074_c4dbgp("expect another val");1075addrem_flags(RVAL, RNXT);1076_line_progressed(2);1077return true;1078}1079else if(rem == '-')1080{1081_c4dbgp("expect another val");1082addrem_flags(RVAL, RNXT);1083_line_progressed(1);1084return true;1085}1086else if(rem.begins_with_any(" \t"))1087{1088_RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin());1089_skipchars(" \t");1090return true;1091}1092else if(rem.begins_with("..."))1093{1094_c4dbgp("got stream end '...'");1095_end_stream();1096_line_progressed(3);1097return true;1098}1099else if(rem.begins_with("---"))1100{1101_c4dbgp("got document start '---'");1102_start_new_doc(rem);1103return true;1104}1105else1106{1107_c4err("parse error");1108}1109}1110else if(has_any(RVAL))1111{1112// there can be empty values1113if(_handle_indentation())1114return true;11151116csubstr s;1117bool is_quoted;1118if(_scan_scalar_seq_blck(&s, &is_quoted)) // this also progresses the line1119{1120_c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : "");11211122rem = m_state->line_contents.rem;1123if(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(rem.begins_with_any(" \t"), rem.begins_with(' ')))1124{1125_c4dbgp("skipping whitespace...");1126size_t skip = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));1127if(skip == csubstr::npos)1128skip = rem.len; // maybe the line is just whitespace1129_line_progressed(skip);1130rem = rem.sub(skip);1131}11321133_c4dbgpf("rem=[{}]~~~{}~~~", rem.len, rem);1134if(!rem.begins_with('#') && (rem.ends_with(':') || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))))1135{1136_c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope");1137if(m_key_anchor.empty())1138_move_val_anchor_to_key_anchor();1139if(m_key_tag.empty())1140_move_val_tag_to_key_tag();1141addrem_flags(RNXT, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT1142_push_level();1143_start_map();1144_store_scalar(s, is_quoted);1145if( ! _maybe_set_indentation_from_anchor_or_tag())1146{1147_c4dbgpf("set indentation from scalar: {}", m_state->scalar_col);1148_set_indentation(m_state->scalar_col); // this is the column where the scalar starts1149}1150_move_key_tag2_to_key_tag();1151addrem_flags(RVAL, RKEY);1152_line_progressed(1);1153}1154else1155{1156_c4dbgp("appending val to current seq");1157_append_val(s, is_quoted);1158addrem_flags(RNXT, RVAL);1159}1160return true;1161}1162else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t")))1163{1164if(_rval_dash_start_or_continue_seq())1165_line_progressed(2);1166return true;1167}1168else if(rem == '-')1169{1170if(_rval_dash_start_or_continue_seq())1171_line_progressed(1);1172return true;1173}1174else if(rem.begins_with('['))1175{1176_c4dbgp("val is a child seq, flow");1177addrem_flags(RNXT, RVAL); // before _push_level!1178_push_level(/*explicit flow*/true);1179_start_seq();1180add_flags(FLOW);1181_line_progressed(1);1182return true;1183}1184else if(rem.begins_with('{'))1185{1186_c4dbgp("val is a child map, flow");1187addrem_flags(RNXT, RVAL); // before _push_level!1188_push_level(/*explicit flow*/true);1189_start_map();1190addrem_flags(FLOW|RKEY, RVAL);1191_line_progressed(1);1192return true;1193}1194else if(rem.begins_with("? "))1195{1196_c4dbgp("val is a child map + this key is complex");1197addrem_flags(RNXT, RVAL); // before _push_level!1198_push_level();1199_start_map();1200addrem_flags(QMRK|RKEY, RVAL);1201_save_indentation();1202_line_progressed(2);1203return true;1204}1205else if(rem.begins_with(' '))1206{1207csubstr spc = rem.left_of(rem.first_not_of(' '));1208if(_at_line_begin())1209{1210_c4dbgpf("skipping value indentation: {} spaces", spc.len);1211_line_progressed(spc.len);1212return true;1213}1214else1215{1216_c4dbgpf("skipping {} spaces", spc.len);1217_line_progressed(spc.len);1218return true;1219}1220}1221else if(_handle_types())1222{1223return true;1224}1225else if(_handle_val_anchors_and_refs())1226{1227return true;1228}1229/* pathological case:1230* - &key : val1231* - &key :1232* - : val1233*/1234else if((!has_all(SSCL)) &&1235(rem.begins_with(": ") || rem.left_of(rem.find("#")).trimr("\t") == ":"))1236{1237if(!m_val_anchor.empty() || !m_val_tag.empty())1238{1239_c4dbgp("val is a child map + this key is empty, with anchors or tags");1240addrem_flags(RNXT, RVAL); // before _push_level!1241_move_val_tag_to_key_tag();1242_move_val_anchor_to_key_anchor();1243_push_level();1244_start_map();1245_store_scalar_null(rem.str);1246addrem_flags(RVAL, RKEY);1247RYML_CHECK(_maybe_set_indentation_from_anchor_or_tag()); // one of them must exist1248_line_progressed(rem.begins_with(": ") ? 2u : 1u);1249return true;1250}1251else1252{1253_c4dbgp("val is a child map + this key is empty, no anchors or tags");1254addrem_flags(RNXT, RVAL); // before _push_level!1255size_t ind = m_state->indref;1256_push_level();1257_start_map();1258_store_scalar_null(rem.str);1259addrem_flags(RVAL, RKEY);1260_c4dbgpf("set indentation from map anchor: {}", ind + 2);1261_set_indentation(ind + 2); // this is the column where the map starts1262_line_progressed(rem.begins_with(": ") ? 2u : 1u);1263return true;1264}1265}1266else1267{1268_c4err("parse error");1269}1270}12711272return false;1273}12741275//-----------------------------------------------------------------------------12761277bool Parser::_rval_dash_start_or_continue_seq()1278{1279size_t ind = m_state->line_contents.current_col();1280_RYML_CB_ASSERT(m_stack.m_callbacks, ind >= m_state->indref);1281size_t delta_ind = ind - m_state->indref;1282if( ! delta_ind)1283{1284_c4dbgp("prev val was empty");1285addrem_flags(RNXT, RVAL);1286_append_val_null(&m_state->line_contents.full[ind]);1287return false;1288}1289_c4dbgp("val is a nested seq, indented");1290addrem_flags(RNXT, RVAL); // before _push_level!1291_push_level();1292_start_seq();1293_save_indentation();1294return true;1295}12961297//-----------------------------------------------------------------------------1298bool Parser::_handle_map_flow()1299{1300// explicit flow, ie, inside {}, separated by commas1301_c4dbgpf("handle_map_flow: node_id={} level={}", m_state->node_id, m_state->level);1302csubstr rem = m_state->line_contents.rem;13031304_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP|FLOW));13051306if(rem.begins_with(' '))1307{1308// with explicit flow, indentation does not matter1309_c4dbgp("starts with spaces");1310_skipchars(' ');1311return true;1312}1313_RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t'))1314{1315// with explicit flow, indentation does not matter1316_c4dbgp("starts with tabs");1317_skipchars('\t');1318return true;1319})1320else if(rem.begins_with('#'))1321{1322_c4dbgp("it's a comment");1323rem = _scan_comment(); // also progresses the line1324return true;1325}1326else if(rem.begins_with('}'))1327{1328_c4dbgp("end the map");1329if(has_all(SSCL))1330{1331_c4dbgp("the last val was null");1332_append_key_val_null(rem.str - 1);1333rem_flags(RVAL);1334}1335_pop_level();1336_line_progressed(1);1337if(has_all(RSEQIMAP))1338{1339_c4dbgp("stopping implicitly nested 1x map");1340_stop_seqimap();1341_pop_level();1342}1343return true;1344}13451346if(has_any(RNXT))1347{1348_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));1349_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));1350_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RSEQIMAP));13511352if(rem.begins_with(", "))1353{1354_c4dbgp("seq: expect next keyval");1355addrem_flags(RKEY, RNXT);1356_line_progressed(2);1357return true;1358}1359else if(rem.begins_with(','))1360{1361_c4dbgp("seq: expect next keyval");1362addrem_flags(RKEY, RNXT);1363_line_progressed(1);1364return true;1365}1366else1367{1368_c4err("parse error");1369}1370}1371else if(has_any(RKEY))1372{1373_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));1374_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));13751376bool is_quoted;1377if(has_none(SSCL) && _scan_scalar_map_flow(&rem, &is_quoted))1378{1379_c4dbgp("it's a scalar");1380_store_scalar(rem, is_quoted);1381rem = m_state->line_contents.rem;1382csubstr trimmed = rem.triml(" \t");1383if(trimmed.len && (trimmed.begins_with(": ") || trimmed.begins_with_any(":,}") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))))1384{1385_RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= rem.str);1386size_t num = static_cast<size_t>(trimmed.str - rem.str);1387_c4dbgpf("trimming {} whitespace after the scalar: '{}' --> '{}'", num, rem, rem.sub(num));1388rem = rem.sub(num);1389_line_progressed(num);1390}1391}13921393if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))1394{1395_c4dbgp("wait for val");1396addrem_flags(RVAL, RKEY|QMRK);1397_line_progressed(2);1398if(!has_all(SSCL))1399{1400_c4dbgp("no key was found, defaulting to empty key ''");1401_store_scalar_null(rem.str);1402}1403return true;1404}1405else if(rem == ':')1406{1407_c4dbgp("wait for val");1408addrem_flags(RVAL, RKEY|QMRK);1409_line_progressed(1);1410if(!has_all(SSCL))1411{1412_c4dbgp("no key was found, defaulting to empty key ''");1413_store_scalar_null(rem.str);1414}1415return true;1416}1417else if(rem.begins_with('?'))1418{1419_c4dbgp("complex key");1420add_flags(QMRK);1421_line_progressed(1);1422return true;1423}1424else if(rem.begins_with(','))1425{1426_c4dbgp("prev scalar was a key with null value");1427_append_key_val_null(rem.str - 1);1428_line_progressed(1);1429return true;1430}1431else if(rem.begins_with('}'))1432{1433_c4dbgp("map terminates after a key...");1434_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL));1435_c4dbgp("the last val was null");1436_append_key_val_null(rem.str - 1);1437rem_flags(RVAL);1438if(has_all(RSEQIMAP))1439{1440_c4dbgp("stopping implicitly nested 1x map");1441_stop_seqimap();1442_pop_level();1443}1444_pop_level();1445_line_progressed(1);1446return true;1447}1448else if(_handle_types())1449{1450return true;1451}1452else if(_handle_key_anchors_and_refs())1453{1454return true;1455}1456else if(rem == "")1457{1458return true;1459}1460else1461{1462size_t pos = rem.first_not_of(" \t");1463if(pos == csubstr::npos)1464pos = 0;1465rem = rem.sub(pos);1466if(rem.begins_with(':'))1467{1468_c4dbgp("wait for val");1469addrem_flags(RVAL, RKEY|QMRK);1470_line_progressed(pos + 1);1471if(!has_all(SSCL))1472{1473_c4dbgp("no key was found, defaulting to empty key ''");1474_store_scalar_null(rem.str);1475}1476return true;1477}1478else if(rem.begins_with('#'))1479{1480_c4dbgp("it's a comment");1481_line_progressed(pos);1482rem = _scan_comment(); // also progresses the line1483return true;1484}1485else1486{1487_c4err("parse error");1488}1489}1490}1491else if(has_any(RVAL))1492{1493_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));1494_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));1495_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL));1496bool is_quoted;1497if(_scan_scalar_map_flow(&rem, &is_quoted))1498{1499_c4dbgp("it's a scalar");1500addrem_flags(RNXT, RVAL|RKEY);1501_append_key_val(rem, is_quoted);1502if(has_all(RSEQIMAP))1503{1504_c4dbgp("stopping implicitly nested 1x map");1505_stop_seqimap();1506_pop_level();1507}1508return true;1509}1510else if(rem.begins_with('['))1511{1512_c4dbgp("val is a child seq");1513addrem_flags(RNXT, RVAL|RKEY); // before _push_level!1514_push_level(/*explicit flow*/true);1515_move_scalar_from_top();1516_start_seq();1517add_flags(FLOW);1518_line_progressed(1);1519return true;1520}1521else if(rem.begins_with('{'))1522{1523_c4dbgp("val is a child map");1524addrem_flags(RNXT, RVAL|RKEY); // before _push_level!1525_push_level(/*explicit flow*/true);1526_move_scalar_from_top();1527_start_map();1528addrem_flags(FLOW|RKEY, RNXT|RVAL);1529_line_progressed(1);1530return true;1531}1532else if(_handle_types())1533{1534return true;1535}1536else if(_handle_val_anchors_and_refs())1537{1538return true;1539}1540else if(rem.begins_with(','))1541{1542_c4dbgp("appending empty val");1543_append_key_val_null(rem.str - 1);1544addrem_flags(RKEY, RVAL);1545_line_progressed(1);1546if(has_any(RSEQIMAP))1547{1548_c4dbgp("stopping implicitly nested 1x map");1549_stop_seqimap();1550_pop_level();1551}1552return true;1553}1554else if(has_any(RSEQIMAP) && rem.begins_with(']'))1555{1556_c4dbgp("stopping implicitly nested 1x map");1557if(has_any(SSCL))1558{1559_append_key_val_null(rem.str - 1);1560}1561_stop_seqimap();1562_pop_level();1563return true;1564}1565else1566{1567_c4err("parse error");1568}1569}1570else1571{1572_c4err("internal error");1573}15741575return false;1576}15771578//-----------------------------------------------------------------------------1579bool Parser::_handle_map_blck()1580{1581_c4dbgpf("handle_map_blck: node_id={} level={}", m_state->node_id, m_state->level);1582csubstr rem = m_state->line_contents.rem;15831584_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP));1585_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW));15861587if(rem.begins_with('#'))1588{1589_c4dbgp("it's a comment");1590rem = _scan_comment();1591return true;1592}15931594if(has_any(RNXT))1595{1596_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));1597_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));1598// actually, we don't need RNXT in indent-based maps.1599addrem_flags(RKEY, RNXT);1600}16011602if(_handle_indentation())1603{1604_c4dbgp("indentation token");1605return true;1606}16071608if(has_any(RKEY))1609{1610_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));1611_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));16121613_c4dbgp("RMAP|RKEY read scalar?");1614bool is_quoted;1615if(_scan_scalar_map_blck(&rem, &is_quoted)) // this also progresses the line1616{1617_c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : "");1618if(has_all(QMRK|SSCL))1619{1620_c4dbgpf("current key is QMRK; SSCL is set. so take store scalar='{}' as key and add an empty val", m_state->scalar);1621_append_key_val_null(rem.str - 1);1622}1623_store_scalar(rem, is_quoted);1624if(has_all(QMRK|RSET))1625{1626_c4dbgp("it's a complex key, so use null value '~'");1627_append_key_val_null(rem.str);1628}1629rem = m_state->line_contents.rem;16301631if(rem.begins_with(':'))1632{1633_c4dbgp("wait for val");1634addrem_flags(RVAL, RKEY|QMRK);1635_line_progressed(1);1636rem = m_state->line_contents.rem;1637if(rem.begins_with_any(" \t"))1638{1639_RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin());1640rem = rem.left_of(rem.first_not_of(" \t"));1641_c4dbgpf("skip {} spaces/tabs", rem.len);1642_line_progressed(rem.len);1643}1644}1645return true;1646}1647else if(rem.begins_with_any(" \t"))1648{1649size_t pos = rem.first_not_of(" \t");1650if(pos == npos)1651pos = rem.len;1652_c4dbgpf("skip {} spaces/tabs", pos);1653_line_progressed(pos);1654return true;1655}1656else if(rem == '?' || rem.begins_with("? "))1657{1658_c4dbgp("it's a complex key");1659_line_progressed(rem.begins_with("? ") ? 2u : 1u);1660if(has_any(SSCL))1661_append_key_val_null(rem.str - 1);1662add_flags(QMRK);1663return true;1664}1665else if(has_all(QMRK) && rem.begins_with(':'))1666{1667_c4dbgp("complex key finished");1668if(!has_any(SSCL))1669_store_scalar_null(rem.str);1670addrem_flags(RVAL, RKEY|QMRK);1671_line_progressed(1);1672rem = m_state->line_contents.rem;1673if(rem.begins_with(' '))1674{1675_RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin());1676_skipchars(' ');1677}1678return true;1679}1680else if(rem == ':' || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))1681{1682_c4dbgp("key finished");1683if(!has_all(SSCL))1684{1685_c4dbgp("key was empty...");1686_store_scalar_null(rem.str);1687rem_flags(QMRK);1688}1689addrem_flags(RVAL, RKEY);1690_line_progressed(rem == ':' ? 1 : 2);1691return true;1692}1693else if(rem.begins_with("..."))1694{1695_c4dbgp("end current document");1696_end_stream();1697_line_progressed(3);1698return true;1699}1700else if(rem.begins_with("---"))1701{1702_c4dbgp("start new document '---'");1703_start_new_doc(rem);1704return true;1705}1706else if(_handle_types())1707{1708return true;1709}1710else if(_handle_key_anchors_and_refs())1711{1712return true;1713}1714else1715{1716_c4err("parse error");1717}1718}1719else if(has_any(RVAL))1720{1721_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));1722_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));17231724_c4dbgp("RMAP|RVAL read scalar?");1725csubstr s;1726bool is_quoted;1727if(_scan_scalar_map_blck(&s, &is_quoted)) // this also progresses the line1728{1729_c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : "");17301731rem = m_state->line_contents.rem;17321733if(rem.begins_with(": "))1734{1735_c4dbgp("actually, the scalar is the first key of a map");1736addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT1737_push_level();1738_move_scalar_from_top();1739_move_val_anchor_to_key_anchor();1740_start_map();1741_save_indentation(m_state->scalar_col);1742addrem_flags(RVAL, RKEY);1743_line_progressed(2);1744}1745else if(rem.begins_with(':'))1746{1747_c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope");1748addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT1749_push_level();1750_move_scalar_from_top();1751_move_val_anchor_to_key_anchor();1752_start_map();1753_save_indentation(/*behind*/s.len);1754addrem_flags(RVAL, RKEY);1755_line_progressed(1);1756}1757else1758{1759_c4dbgp("appending keyval to current map");1760_append_key_val(s, is_quoted);1761addrem_flags(RKEY, RVAL);1762}1763return true;1764}1765else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t")))1766{1767_c4dbgp("val is a nested seq, indented");1768addrem_flags(RKEY, RVAL); // before _push_level!1769_push_level();1770_move_scalar_from_top();1771_start_seq();1772_save_indentation();1773_line_progressed(2);1774return true;1775}1776else if(rem == '-')1777{1778_c4dbgp("maybe a seq. start unknown, indented");1779_start_unk();1780_save_indentation();1781_line_progressed(1);1782return true;1783}1784else if(rem.begins_with('['))1785{1786_c4dbgp("val is a child seq, flow");1787addrem_flags(RKEY, RVAL); // before _push_level!1788_push_level(/*explicit flow*/true);1789_move_scalar_from_top();1790_start_seq();1791add_flags(FLOW);1792_line_progressed(1);1793return true;1794}1795else if(rem.begins_with('{'))1796{1797_c4dbgp("val is a child map, flow");1798addrem_flags(RKEY, RVAL); // before _push_level!1799_push_level(/*explicit flow*/true);1800_move_scalar_from_top();1801_start_map();1802addrem_flags(FLOW|RKEY, RVAL);1803_line_progressed(1);1804return true;1805}1806else if(rem.begins_with(' '))1807{1808csubstr spc = rem.left_of(rem.first_not_of(' '));1809if(_at_line_begin())1810{1811_c4dbgpf("skipping value indentation: {} spaces", spc.len);1812_line_progressed(spc.len);1813return true;1814}1815else1816{1817_c4dbgpf("skipping {} spaces", spc.len);1818_line_progressed(spc.len);1819return true;1820}1821}1822else if(_handle_types())1823{1824return true;1825}1826else if(_handle_val_anchors_and_refs())1827{1828return true;1829}1830else if(rem.begins_with("--- ") || rem == "---" || rem.begins_with("---\t"))1831{1832_start_new_doc(rem);1833return true;1834}1835else if(rem.begins_with("..."))1836{1837_c4dbgp("end current document");1838_end_stream();1839_line_progressed(3);1840return true;1841}1842else1843{1844_c4err("parse error");1845}1846}1847else1848{1849_c4err("internal error");1850}18511852return false;1853}185418551856//-----------------------------------------------------------------------------1857bool Parser::_handle_top()1858{1859_c4dbgp("handle_top");1860csubstr rem = m_state->line_contents.rem;18611862if(rem.begins_with('#'))1863{1864_c4dbgp("a comment line");1865_scan_comment();1866return true;1867}18681869csubstr trimmed = rem.triml(' ');18701871if(trimmed.begins_with('%'))1872{1873_handle_directive(trimmed);1874_line_progressed(rem.len);1875return true;1876}1877else if(trimmed.begins_with("--- ") || trimmed == "---" || trimmed.begins_with("---\t"))1878{1879_start_new_doc(rem);1880if(trimmed.len < rem.len)1881{1882_line_progressed(rem.len - trimmed.len);1883_save_indentation();1884}1885return true;1886}1887else if(trimmed.begins_with("..."))1888{1889_c4dbgp("end current document");1890_end_stream();1891if(trimmed.len < rem.len)1892{1893_line_progressed(rem.len - trimmed.len);1894}1895_line_progressed(3);1896return true;1897}1898else1899{1900_c4err("parse error");1901}19021903return false;1904}190519061907//-----------------------------------------------------------------------------19081909bool Parser::_handle_key_anchors_and_refs()1910{1911_RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RVAL));1912const csubstr rem = m_state->line_contents.rem;1913if(rem.begins_with('&'))1914{1915_c4dbgp("found a key anchor!!!");1916if(has_all(QMRK|SSCL))1917{1918_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY));1919_c4dbgp("there is a stored key, so this anchor is for the next element");1920_append_key_val_null(rem.str - 1);1921rem_flags(QMRK);1922return true;1923}1924csubstr anchor = rem.left_of(rem.first_of(' '));1925_line_progressed(anchor.len);1926anchor = anchor.sub(1); // skip the first character1927_move_key_anchor_to_val_anchor();1928_c4dbgpf("key anchor value: '{}'", anchor);1929m_key_anchor = anchor;1930m_key_anchor_indentation = m_state->line_contents.current_col(rem);1931return true;1932}1933else if(C4_UNLIKELY(rem.begins_with('*')))1934{1935_c4err("not implemented - this should have been catched elsewhere");1936C4_NEVER_REACH();1937return false;1938}1939return false;1940}19411942bool Parser::_handle_val_anchors_and_refs()1943{1944_RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RKEY));1945const csubstr rem = m_state->line_contents.rem;1946if(rem.begins_with('&'))1947{1948csubstr anchor = rem.left_of(rem.first_of(' '));1949_line_progressed(anchor.len);1950anchor = anchor.sub(1); // skip the first character1951_c4dbgpf("val: found an anchor: '{}', indentation={}!!!", anchor, m_state->line_contents.current_col(rem));1952if(m_val_anchor.empty())1953{1954_c4dbgpf("save val anchor: '{}'", anchor);1955m_val_anchor = anchor;1956m_val_anchor_indentation = m_state->line_contents.current_col(rem);1957}1958else1959{1960_c4dbgpf("there is a pending val anchor '{}'", m_val_anchor);1961if(m_tree->is_seq(m_state->node_id))1962{1963if(m_tree->has_children(m_state->node_id))1964{1965_c4dbgpf("current node={} is a seq, has {} children", m_state->node_id, m_tree->num_children(m_state->node_id));1966_c4dbgpf("... so take the new one as a key anchor '{}'", anchor);1967m_key_anchor = anchor;1968m_key_anchor_indentation = m_state->line_contents.current_col(rem);1969}1970else1971{1972_c4dbgpf("current node={} is a seq, has no children", m_state->node_id);1973if(m_tree->has_val_anchor(m_state->node_id))1974{1975_c4dbgpf("... node={} already has val anchor: '{}'", m_state->node_id, m_tree->val_anchor(m_state->node_id));1976_c4dbgpf("... so take the new one as a key anchor '{}'", anchor);1977m_key_anchor = anchor;1978m_key_anchor_indentation = m_state->line_contents.current_col(rem);1979}1980else1981{1982_c4dbgpf("... so set pending val anchor: '{}' on current node {}", m_val_anchor, m_state->node_id);1983m_tree->set_val_anchor(m_state->node_id, m_val_anchor);1984m_val_anchor = anchor;1985m_val_anchor_indentation = m_state->line_contents.current_col(rem);1986}1987}1988}1989}1990return true;1991}1992else if(C4_UNLIKELY(rem.begins_with('*')))1993{1994_c4err("not implemented - this should have been catched elsewhere");1995C4_NEVER_REACH();1996return false;1997}1998return false;1999}20002001void Parser::_move_key_anchor_to_val_anchor()2002{2003if(m_key_anchor.empty())2004return;2005_c4dbgpf("move current key anchor to val slot: key='{}' -> val='{}'", m_key_anchor, m_val_anchor);2006if(!m_val_anchor.empty())2007_c4err("triple-pending anchor");2008m_val_anchor = m_key_anchor;2009m_val_anchor_indentation = m_key_anchor_indentation;2010m_key_anchor = {};2011m_key_anchor_indentation = {};2012}20132014void Parser::_move_val_anchor_to_key_anchor()2015{2016if(m_val_anchor.empty())2017return;2018if(!_token_is_from_this_line(m_val_anchor))2019return;2020_c4dbgpf("move current val anchor to key slot: key='{}' <- val='{}'", m_key_anchor, m_val_anchor);2021if(!m_key_anchor.empty())2022_c4err("triple-pending anchor");2023m_key_anchor = m_val_anchor;2024m_key_anchor_indentation = m_val_anchor_indentation;2025m_val_anchor = {};2026m_val_anchor_indentation = {};2027}20282029void Parser::_move_key_tag_to_val_tag()2030{2031if(m_key_tag.empty())2032return;2033_c4dbgpf("move key tag to val tag: key='{}' -> val='{}'", m_key_tag, m_val_tag);2034m_val_tag = m_key_tag;2035m_val_tag_indentation = m_key_tag_indentation;2036m_key_tag.clear();2037m_key_tag_indentation = 0;2038}20392040void Parser::_move_val_tag_to_key_tag()2041{2042if(m_val_tag.empty())2043return;2044if(!_token_is_from_this_line(m_val_tag))2045return;2046_c4dbgpf("move val tag to key tag: key='{}' <- val='{}'", m_key_tag, m_val_tag);2047m_key_tag = m_val_tag;2048m_key_tag_indentation = m_val_tag_indentation;2049m_val_tag.clear();2050m_val_tag_indentation = 0;2051}20522053void Parser::_move_key_tag2_to_key_tag()2054{2055if(m_key_tag2.empty())2056return;2057_c4dbgpf("move key tag2 to key tag: key='{}' <- key2='{}'", m_key_tag, m_key_tag2);2058m_key_tag = m_key_tag2;2059m_key_tag_indentation = m_key_tag2_indentation;2060m_key_tag2.clear();2061m_key_tag2_indentation = 0;2062}206320642065//-----------------------------------------------------------------------------20662067bool Parser::_handle_types()2068{2069csubstr rem = m_state->line_contents.rem.triml(' ');2070csubstr t;20712072if(rem.begins_with("!!"))2073{2074_c4dbgp("begins with '!!'");2075t = rem.left_of(rem.first_of(" ,"));2076_RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2);2077//t = t.sub(2);2078if(t == "!!set")2079add_flags(RSET);2080}2081else if(rem.begins_with("!<"))2082{2083_c4dbgp("begins with '!<'");2084t = rem.left_of(rem.first_of('>'), true);2085_RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2);2086//t = t.sub(2, t.len-1);2087}2088else if(rem.begins_with("!h!"))2089{2090_c4dbgp("begins with '!h!'");2091t = rem.left_of(rem.first_of(' '));2092_RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 3);2093//t = t.sub(3);2094}2095else if(rem.begins_with('!'))2096{2097_c4dbgp("begins with '!'");2098t = rem.left_of(rem.first_of(' '));2099_RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1);2100//t = t.sub(1);2101}21022103if(t.empty())2104return false;21052106if(has_all(QMRK|SSCL))2107{2108_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY));2109_c4dbgp("there is a stored key, so this tag is for the next element");2110_append_key_val_null(rem.str - 1);2111rem_flags(QMRK);2112}21132114#ifdef RYML_NO_COVERAGE__TO_BE_DELETED2115const char *tag_beginning = rem.str;2116#endif2117size_t tag_indentation = m_state->line_contents.current_col(t);2118_c4dbgpf("there was a tag: '{}', indentation={}", t, tag_indentation);2119_RYML_CB_ASSERT(m_stack.m_callbacks, t.end() > m_state->line_contents.rem.begin());2120_line_progressed(static_cast<size_t>(t.end() - m_state->line_contents.rem.begin()));2121{2122size_t pos = m_state->line_contents.rem.first_not_of(" \t");2123if(pos != csubstr::npos)2124_line_progressed(pos);2125}21262127if(has_all(RMAP|RKEY))2128{2129_c4dbgpf("saving map key tag '{}'", t);2130_RYML_CB_ASSERT(m_stack.m_callbacks, m_key_tag.empty());2131m_key_tag = t;2132m_key_tag_indentation = tag_indentation;2133}2134else if(has_all(RMAP|RVAL))2135{2136/* foo: !!str2137* !!str : bar */2138rem = m_state->line_contents.rem;2139rem = rem.left_of(rem.find("#"));2140rem = rem.trimr(" \t");2141_c4dbgpf("rem='{}'", rem);2142#ifdef RYML_NO_COVERAGE__TO_BE_DELETED2143if(rem == ':' || rem.begins_with(": "))2144{2145_c4dbgp("the last val was null, and this is a tag from a null key");2146_append_key_val_null(tag_beginning - 1);2147_store_scalar_null(rem.str - 1);2148// do not change the flag to key, it is ~2149_RYML_CB_ASSERT(m_stack.m_callbacks, rem.begin() > m_state->line_contents.rem.begin());2150size_t token_len = rem == ':' ? 1 : 2;2151_line_progressed(static_cast<size_t>(token_len + rem.begin() - m_state->line_contents.rem.begin()));2152}2153#endif2154_c4dbgpf("saving map val tag '{}'", t);2155_RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty());2156m_val_tag = t;2157m_val_tag_indentation = tag_indentation;2158}2159else if(has_all(RSEQ|RVAL) || has_all(RTOP|RUNK|NDOC))2160{2161if(m_val_tag.empty())2162{2163_c4dbgpf("saving seq/doc val tag '{}'", t);2164m_val_tag = t;2165m_val_tag_indentation = tag_indentation;2166}2167else2168{2169_c4dbgpf("saving seq/doc key tag '{}'", t);2170m_key_tag = t;2171m_key_tag_indentation = tag_indentation;2172}2173}2174else if(has_all(RTOP|RUNK) || has_any(RUNK))2175{2176rem = m_state->line_contents.rem;2177rem = rem.left_of(rem.find("#"));2178rem = rem.trimr(" \t");2179if(rem.empty())2180{2181_c4dbgpf("saving val tag '{}'", t);2182_RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty());2183m_val_tag = t;2184m_val_tag_indentation = tag_indentation;2185}2186else2187{2188_c4dbgpf("saving key tag '{}'", t);2189if(m_key_tag.empty())2190{2191m_key_tag = t;2192m_key_tag_indentation = tag_indentation;2193}2194else2195{2196/* handle this case:2197* !!str foo: !!map2198* !!int 1: !!float 20.02199* !!int 3: !!float 40.02200*2201* (m_key_tag would be !!str and m_key_tag2 would be !!int)2202*/2203m_key_tag2 = t;2204m_key_tag2_indentation = tag_indentation;2205}2206}2207}2208else2209{2210_c4err("internal error");2211}22122213if(m_val_tag.not_empty())2214{2215YamlTag_e tag = to_tag(t);2216if(tag == TAG_STR)2217{2218_c4dbgpf("tag '{}' is a str-type tag", t);2219if(has_all(RTOP|RUNK|NDOC))2220{2221_c4dbgpf("docval. slurping the string. pos={}", m_state->pos.offset);2222csubstr scalar = _slurp_doc_scalar();2223_c4dbgpf("docval. after slurp: {}, at node {}: '{}'", m_state->pos.offset, m_state->node_id, scalar);2224m_tree->to_val(m_state->node_id, scalar, DOC);2225_c4dbgpf("docval. val tag {} -> {}", m_val_tag, normalize_tag(m_val_tag));2226m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag));2227m_val_tag.clear();2228if(!m_val_anchor.empty())2229{2230_c4dbgpf("setting val anchor[{}]='{}'", m_state->node_id, m_val_anchor);2231m_tree->set_val_anchor(m_state->node_id, m_val_anchor);2232m_val_anchor.clear();2233}2234_end_stream();2235}2236}2237}2238return true;2239}22402241//-----------------------------------------------------------------------------2242csubstr Parser::_slurp_doc_scalar()2243{2244csubstr s = m_state->line_contents.rem;2245size_t pos = m_state->pos.offset;2246_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.find("---") != csubstr::npos);2247_c4dbgpf("slurp 0 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset));2248if(s.len == 0)2249{2250_line_ended();2251_scan_line();2252s = m_state->line_contents.rem;2253pos = m_state->pos.offset;2254}22552256size_t skipws = s.first_not_of(" \t");2257_c4dbgpf("slurp 1 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset));2258if(skipws != npos)2259{2260_line_progressed(skipws);2261s = m_state->line_contents.rem;2262pos = m_state->pos.offset;2263_c4dbgpf("slurp 2 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset));2264}22652266_RYML_CB_ASSERT(m_stack.m_callbacks, m_val_anchor.empty());2267_handle_val_anchors_and_refs();2268if(!m_val_anchor.empty())2269{2270s = m_state->line_contents.rem;2271skipws = s.first_not_of(" \t");2272if(skipws != npos)2273{2274_line_progressed(skipws);2275}2276s = m_state->line_contents.rem;2277pos = m_state->pos.offset;2278_c4dbgpf("slurp 3 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset));2279}22802281if(s.begins_with('\''))2282{2283m_state->scalar_col = m_state->line_contents.current_col(s);2284return _scan_squot_scalar();2285}2286else if(s.begins_with('"'))2287{2288m_state->scalar_col = m_state->line_contents.current_col(s);2289return _scan_dquot_scalar();2290}2291else if(s.begins_with('|') || s.begins_with('>'))2292{2293return _scan_block();2294}22952296_c4dbgpf("slurp 4 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset));22972298m_state->scalar_col = m_state->line_contents.current_col(s);2299_RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() + pos);2300_line_progressed(static_cast<size_t>(s.end() - (m_buf.begin() + pos)));23012302_c4dbgpf("slurp 5 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset));23032304if(_at_line_end())2305{2306_c4dbgpf("at line end. curr='{}'", s);2307s = _extend_scanned_scalar(s);2308}23092310_c4dbgpf("scalar was '{}'", s);23112312return s;2313}231423152316//-----------------------------------------------------------------------------23172318bool Parser::_scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)2319{2320_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RSEQ));2321_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RVAL));2322_RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(RKEY));2323_RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(FLOW));23242325csubstr s = m_state->line_contents.rem;2326if(s.len == 0)2327return false;2328s = s.trim(" \t");2329if(s.len == 0)2330return false;23312332if(s.begins_with('\''))2333{2334_c4dbgp("got a ': scanning single-quoted scalar");2335m_state->scalar_col = m_state->line_contents.current_col(s);2336*scalar = _scan_squot_scalar();2337*quoted = true;2338return true;2339}2340else if(s.begins_with('"'))2341{2342_c4dbgp("got a \": scanning double-quoted scalar");2343m_state->scalar_col = m_state->line_contents.current_col(s);2344*scalar = _scan_dquot_scalar();2345*quoted = true;2346return true;2347}2348else if(s.begins_with('|') || s.begins_with('>'))2349{2350*scalar = _scan_block();2351*quoted = true;2352return true;2353}2354else if(has_any(RTOP) && _is_doc_sep(s))2355{2356return false;2357}23582359_c4dbgp("RSEQ|RVAL");2360if( ! _is_scalar_next__rseq_rval(s))2361return false;2362_RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t"))2363return false;2364)23652366if(s.ends_with(':'))2367{2368--s.len;2369}2370else2371{2372auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #");2373if(first)2374s.len = first.pos;2375}2376s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));23772378if(s.empty())2379return false;23802381m_state->scalar_col = m_state->line_contents.current_col(s);2382_RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str);2383_line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len);23842385if(_at_line_end() && s != '~')2386{2387_c4dbgpf("at line end. curr='{}'", s);2388s = _extend_scanned_scalar(s);2389}23902391_c4dbgpf("scalar was '{}'", s);23922393*scalar = s;2394*quoted = false;2395return true;2396}23972398bool Parser::_scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)2399{2400_c4dbgp("_scan_scalar_map_blck");2401_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP));2402_RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(FLOW));2403_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY|RVAL));24042405csubstr s = m_state->line_contents.rem;2406#ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED2407if(s.len == 0)2408return false;2409#endif2410s = s.trim(" \t");2411if(s.len == 0)2412return false;24132414if(s.begins_with('\''))2415{2416_c4dbgp("got a ': scanning single-quoted scalar");2417m_state->scalar_col = m_state->line_contents.current_col(s);2418*scalar = _scan_squot_scalar();2419*quoted = true;2420return true;2421}2422else if(s.begins_with('"'))2423{2424_c4dbgp("got a \": scanning double-quoted scalar");2425m_state->scalar_col = m_state->line_contents.current_col(s);2426*scalar = _scan_dquot_scalar();2427*quoted = true;2428return true;2429}2430else if(s.begins_with('|') || s.begins_with('>'))2431{2432*scalar = _scan_block();2433*quoted = true;2434return true;2435}2436else if(has_any(RTOP) && _is_doc_sep(s))2437{2438return false;2439}24402441if( ! _is_scalar_next__rmap(s))2442return false;24432444size_t colon_token = s.find(": ");2445if(colon_token == npos)2446{2447_RYML_WITH_OR_WITHOUT_TAB_TOKENS(2448// with tab tokens2449colon_token = s.find(":\t");2450if(colon_token == npos)2451{2452_RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);2453colon_token = s.find(':');2454if(colon_token != s.len-1)2455colon_token = npos;2456}2457,2458// without tab tokens2459colon_token = s.find(':');2460_RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);2461if(colon_token != s.len-1)2462colon_token = npos;2463)2464}24652466if(has_all(RKEY))2467{2468_RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' '));2469if(has_any(QMRK))2470{2471_c4dbgp("RMAP|RKEY|CPLX");2472_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP));2473if(s.begins_with("? ") || s == '?')2474return false;2475s = s.left_of(colon_token);2476s = s.left_of(s.first_of("#"));2477s = s.trimr(" \t");2478if(s.begins_with("---"))2479return false;2480else if(s.begins_with("..."))2481return false;2482}2483else2484{2485_c4dbgp("RMAP|RKEY");2486_RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{'));2487if(s.begins_with("? ") || s == '?')2488return false;2489s = s.left_of(colon_token);2490s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));2491if(s.begins_with("---"))2492{2493return false;2494}2495else if(s.begins_with("..."))2496{2497return false;2498}2499}2500}2501else if(has_all(RVAL))2502{2503_c4dbgp("RMAP|RVAL");2504_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK));2505if( ! _is_scalar_next__rmap_val(s))2506return false;2507_RYML_WITH_TAB_TOKENS(2508else if(s.begins_with("-\t"))2509return false;2510)2511_c4dbgp("RMAP|RVAL: scalar");2512s = s.left_of(s.find(" #")); // is there a comment?2513s = s.left_of(s.find("\t#")); // is there a comment?2514s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));2515if(s.begins_with("---"))2516return false;2517#ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED2518else if(s.begins_with("..."))2519return false;2520#endif2521}25222523if(s.empty())2524return false;25252526m_state->scalar_col = m_state->line_contents.current_col(s);2527_RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str);2528_line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len);25292530if(_at_line_end() && s != '~')2531{2532_c4dbgpf("at line end. curr='{}'", s);2533s = _extend_scanned_scalar(s);2534}25352536_c4dbgpf("scalar was '{}'", s);25372538*scalar = s;2539*quoted = false;2540return true;2541}25422543bool Parser::_scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)2544{2545_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RSEQ));2546_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(FLOW));2547_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RVAL));2548_RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(RKEY));25492550csubstr s = m_state->line_contents.rem;2551if(s.len == 0)2552return false;2553s = s.trim(" \t");2554if(s.len == 0)2555return false;25562557if(s.begins_with('\''))2558{2559_c4dbgp("got a ': scanning single-quoted scalar");2560m_state->scalar_col = m_state->line_contents.current_col(s);2561*scalar = _scan_squot_scalar();2562*quoted = true;2563return true;2564}2565else if(s.begins_with('"'))2566{2567_c4dbgp("got a \": scanning double-quoted scalar");2568m_state->scalar_col = m_state->line_contents.current_col(s);2569*scalar = _scan_dquot_scalar();2570*quoted = true;2571return true;2572}25732574if(has_all(RVAL))2575{2576_c4dbgp("RSEQ|RVAL");2577if( ! _is_scalar_next__rseq_rval(s))2578return false;2579_RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t"))2580return false;2581)2582_c4dbgp("RSEQ|RVAL|FLOW");2583s = s.left_of(s.first_of(",]"));2584if(s.ends_with(':'))2585{2586--s.len;2587}2588else2589{2590auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #");2591if(first)2592s.len = first.pos;2593}2594s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));2595}25962597if(s.empty())2598return false;25992600m_state->scalar_col = m_state->line_contents.current_col(s);2601_RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str);2602_line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len);26032604if(_at_line_end() && s != '~')2605{2606_c4dbgpf("at line end. curr='{}'", s);2607s = _extend_scanned_scalar(s);2608}26092610_c4dbgpf("scalar was '{}'", s);26112612*scalar = s;2613*quoted = false;2614return true;2615}26162617bool Parser::_scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)2618{2619_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP));2620_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(FLOW));2621_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY|RVAL));26222623csubstr s = m_state->line_contents.rem;2624if(s.len == 0)2625return false;2626s = s.trim(" \t");2627if(s.len == 0)2628return false;26292630if(s.begins_with('\''))2631{2632_c4dbgp("got a ': scanning single-quoted scalar");2633m_state->scalar_col = m_state->line_contents.current_col(s);2634*scalar = _scan_squot_scalar();2635*quoted = true;2636return true;2637}2638else if(s.begins_with('"'))2639{2640_c4dbgp("got a \": scanning double-quoted scalar");2641m_state->scalar_col = m_state->line_contents.current_col(s);2642*scalar = _scan_dquot_scalar();2643*quoted = true;2644return true;2645}26462647if( ! _is_scalar_next__rmap(s))2648return false;26492650if(has_all(RKEY))2651{2652_RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' '));2653size_t colon_token = s.find(": ");2654if(colon_token == npos)2655{2656_RYML_WITH_OR_WITHOUT_TAB_TOKENS(2657// with tab tokens2658colon_token = s.find(":\t");2659if(colon_token == npos)2660{2661_RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);2662colon_token = s.find(':');2663if(colon_token != s.len-1)2664colon_token = npos;2665}2666,2667// without tab tokens2668colon_token = s.find(':');2669_RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);2670if(colon_token != s.len-1)2671colon_token = npos;2672)2673}2674if(s.begins_with("? ") || s == '?')2675return false;2676if(has_any(QMRK))2677{2678_c4dbgp("RMAP|RKEY|CPLX");2679_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP));2680s = s.left_of(colon_token);2681s = s.left_of(s.first_of("#"));2682s = s.left_of(s.first_of(':'));2683s = s.trimr(" \t");2684if(s.begins_with("---"))2685return false;2686else if(s.begins_with("..."))2687return false;2688}2689else2690{2691_RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{'));2692_c4dbgp("RMAP|RKEY");2693s = s.left_of(colon_token);2694s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));2695_c4dbgpf("RMAP|RKEY|FLOW: '{}'", s);2696s = s.left_of(s.first_of(",}"));2697if(s.ends_with(':'))2698--s.len;2699}2700}2701else if(has_all(RVAL))2702{2703_c4dbgp("RMAP|RVAL");2704_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK));2705if( ! _is_scalar_next__rmap_val(s))2706return false;2707_RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t"))2708return false;2709)2710_c4dbgp("RMAP|RVAL|FLOW");2711if(has_none(RSEQIMAP))2712s = s.left_of(s.first_of(",}"));2713else2714s = s.left_of(s.first_of(",]"));2715s = s.left_of(s.find(" #")); // is there a comment?2716s = s.left_of(s.find("\t#")); // is there a comment?2717s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));2718}27192720if(s.empty())2721return false;27222723m_state->scalar_col = m_state->line_contents.current_col(s);2724_RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str);2725_line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len);27262727if(_at_line_end() && s != '~')2728{2729_c4dbgpf("at line end. curr='{}'", s);2730s = _extend_scanned_scalar(s);2731}27322733_c4dbgpf("scalar was '{}'", s);27342735*scalar = s;2736*quoted = false;2737return true;2738}27392740bool Parser::_scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)2741{2742_RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RUNK));27432744csubstr s = m_state->line_contents.rem;2745if(s.len == 0)2746return false;2747s = s.trim(" \t");2748if(s.len == 0)2749return false;27502751if(s.begins_with('\''))2752{2753_c4dbgp("got a ': scanning single-quoted scalar");2754m_state->scalar_col = m_state->line_contents.current_col(s);2755*scalar = _scan_squot_scalar();2756*quoted = true;2757return true;2758}2759else if(s.begins_with('"'))2760{2761_c4dbgp("got a \": scanning double-quoted scalar");2762m_state->scalar_col = m_state->line_contents.current_col(s);2763*scalar = _scan_dquot_scalar();2764*quoted = true;2765return true;2766}2767else if(s.begins_with('|') || s.begins_with('>'))2768{2769*scalar = _scan_block();2770*quoted = true;2771return true;2772}2773else if(has_any(RTOP) && _is_doc_sep(s))2774{2775return false;2776}27772778_c4dbgpf("RUNK '[{}]~~~{}~~~", s.len, s);2779if( ! _is_scalar_next__runk(s))2780{2781_c4dbgp("RUNK: no scalar next");2782return false;2783}2784size_t pos = s.find(" #");2785if(pos != npos)2786{2787_c4dbgpf("RUNK: found ' #' at {}", pos);2788s = s.left_of(pos);2789}2790pos = s.find(": ");2791if(pos != npos)2792{2793_c4dbgpf("RUNK: found ': ' at {}", pos);2794s = s.left_of(pos);2795}2796else if(s.ends_with(':'))2797{2798_c4dbgp("RUNK: ends with ':'");2799s = s.left_of(s.len-1);2800}2801_RYML_WITH_TAB_TOKENS(2802else if((pos = s.find(":\t")) != npos) // TABS2803{2804_c4dbgp("RUNK: ends with ':\\t'");2805s = s.left_of(pos);2806})2807else2808{2809_c4dbgp("RUNK: trimming left of ,");2810s = s.left_of(s.first_of(','));2811}2812s = s.trim(" \t");2813_c4dbgpf("RUNK: scalar=[{}]~~~{}~~~", s.len, s);28142815if(s.empty())2816return false;28172818m_state->scalar_col = m_state->line_contents.current_col(s);2819_RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str);2820_line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len);28212822if(_at_line_end() && s != '~')2823{2824_c4dbgpf("at line end. curr=[{}]~~~{}~~", s.len, s);2825s = _extend_scanned_scalar(s);2826}28272828_c4dbgpf("scalar was [{}]~~~{}~~~", s.len, s);28292830*scalar = s;2831*quoted = false;2832return true;2833}283428352836//-----------------------------------------------------------------------------28372838csubstr Parser::_extend_scanned_scalar(csubstr s)2839{2840if(has_all(RMAP|RKEY|QMRK))2841{2842size_t scalar_indentation = has_any(FLOW) ? 0 : m_state->scalar_col;2843_c4dbgpf("extend_scalar: explicit key! indref={} scalar_indentation={} scalar_col={}", m_state->indref, scalar_indentation, m_state->scalar_col);2844csubstr n = _scan_to_next_nonempty_line(scalar_indentation);2845if(!n.empty())2846{2847substr full = _scan_complex_key(s, n).trimr(" \t\r\n");2848if(full != s)2849s = _filter_plain_scalar(full, scalar_indentation);2850}2851}2852// deal with plain (unquoted) scalars that continue to the next line2853else if(!s.begins_with_any("*")) // cannot be a plain scalar if it starts with * (that's an anchor reference)2854{2855_c4dbgpf("extend_scalar: line ended, scalar='{}'", s);2856if(has_none(FLOW))2857{2858size_t scalar_indentation = m_state->indref + 1;2859if(has_all(RUNK) && scalar_indentation == 1)2860scalar_indentation = 0;2861csubstr n = _scan_to_next_nonempty_line(scalar_indentation);2862if(!n.empty())2863{2864_c4dbgpf("rscalar[IMPL]: state_indref={} state_indentation={} scalar_indentation={}", m_state->indref, m_state->line_contents.indentation, scalar_indentation);2865_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.is_super(n));2866substr full = _scan_plain_scalar_blck(s, n, scalar_indentation);2867if(full.len >= s.len)2868s = _filter_plain_scalar(full, scalar_indentation);2869}2870}2871else2872{2873_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW));2874csubstr n = _scan_to_next_nonempty_line(/*indentation*/0);2875if(!n.empty())2876{2877_c4dbgp("rscalar[FLOW]");2878substr full = _scan_plain_scalar_flow(s, n);2879s = _filter_plain_scalar(full, /*indentation*/0);2880}2881}2882}28832884return s;2885}288628872888//-----------------------------------------------------------------------------28892890substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line)2891{2892static constexpr const csubstr chars = "[]{}?#,";2893size_t pos = peeked_line.first_of(chars);2894bool first = true;2895while(pos != 0)2896{2897if(has_all(RMAP|RKEY) || has_any(RUNK))2898{2899csubstr tpkl = peeked_line.triml(' ').trimr("\r\n");2900if(tpkl.begins_with(": ") || tpkl == ':')2901{2902_c4dbgpf("rscalar[FLOW]: map value starts on the peeked line: '{}'", peeked_line);2903peeked_line = peeked_line.first(0);2904break;2905}2906else2907{2908auto colon_pos = peeked_line.first_of_any(": ", ":");2909if(colon_pos && colon_pos.pos < pos)2910{2911peeked_line = peeked_line.first(colon_pos.pos);2912_c4dbgpf("rscalar[FLOW]: found colon at {}. peeked='{}'", colon_pos.pos, peeked_line);2913_RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin());2914_line_progressed(static_cast<size_t>(peeked_line.end() - m_state->line_contents.rem.begin()));2915break;2916}2917}2918}2919if(pos != npos)2920{2921_c4dbgpf("rscalar[FLOW]: found special character '{}' at {}, stopping: '{}'", peeked_line[pos], pos, peeked_line.left_of(pos).trimr("\r\n"));2922peeked_line = peeked_line.left_of(pos);2923_RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin());2924_line_progressed(static_cast<size_t>(peeked_line.end() - m_state->line_contents.rem.begin()));2925break;2926}2927_c4dbgpf("rscalar[FLOW]: append another line, full: '{}'", peeked_line.trimr("\r\n"));2928if(!first)2929{2930RYML_CHECK(_advance_to_peeked());2931}2932peeked_line = _scan_to_next_nonempty_line(/*indentation*/0);2933if(peeked_line.empty())2934{2935_c4err("expected token or continuation");2936}2937pos = peeked_line.first_of(chars);2938first = false;2939}2940substr full(m_buf.str + (currscalar.str - m_buf.str), m_buf.begin() + m_state->pos.offset);2941full = full.trimr("\n\r ");2942return full;2943}294429452946//-----------------------------------------------------------------------------29472948substr Parser::_scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation)2949{2950_RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar));2951// NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice2952// size_t offs = m_state->pos.offset; // so we workaround by directly counting from the end of the given scalar2953_RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin());2954size_t offs = static_cast<size_t>(currscalar.end() - m_buf.begin());2955_RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.begins_with(' ', indentation));2956while(true)2957{2958_c4dbgpf("rscalar[IMPL]: continuing... ref_indentation={}", indentation);2959if(peeked_line.begins_with("...") || peeked_line.begins_with("---"))2960{2961_c4dbgpf("rscalar[IMPL]: document termination next -- bail now '{}'", peeked_line.trimr("\r\n"));2962break;2963}2964else if(( ! peeked_line.begins_with(' ', indentation))) // is the line deindented?2965{2966if(!peeked_line.trim(" \r\n\t").empty()) // is the line not blank?2967{2968_c4dbgpf("rscalar[IMPL]: deindented line, not blank -- bail now '{}'", peeked_line.trimr("\r\n"));2969break;2970}2971_c4dbgpf("rscalar[IMPL]: line is blank and has less indentation: ref={} line={}: '{}'", indentation, peeked_line.first_not_of(' ') == csubstr::npos ? 0 : peeked_line.first_not_of(' '), peeked_line.trimr("\r\n"));2972_c4dbgpf("rscalar[IMPL]: ... searching for a line starting at indentation {}", indentation);2973csubstr next_peeked = _scan_to_next_nonempty_line(indentation);2974if(next_peeked.empty())2975{2976_c4dbgp("rscalar[IMPL]: ... finished.");2977break;2978}2979_c4dbgp("rscalar[IMPL]: ... continuing.");2980peeked_line = next_peeked;2981}29822983_c4dbgpf("rscalar[IMPL]: line contents: '{}'", peeked_line.right_of(indentation, true).trimr("\r\n"));2984size_t token_pos;2985if(peeked_line.find(": ") != npos)2986{2987_line_progressed(peeked_line.find(": "));2988_c4err("': ' is not a valid token in plain flow (unquoted) scalars");2989}2990else if(peeked_line.ends_with(':'))2991{2992_line_progressed(peeked_line.find(':'));2993_c4err("lines cannot end with ':' in plain flow (unquoted) scalars");2994}2995else if((token_pos = peeked_line.find(" #")) != npos)2996{2997_line_progressed(token_pos);2998break;2999//_c4err("' #' is not a valid token in plain flow (unquoted) scalars");3000}30013002_c4dbgpf("rscalar[IMPL]: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n"));3003if(!_advance_to_peeked())3004{3005_c4dbgp("rscalar[IMPL]: file finishes after the scalar");3006break;3007}3008peeked_line = m_state->line_contents.rem;3009}3010_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs);3011substr full(m_buf.str + (currscalar.str - m_buf.str),3012currscalar.len + (m_state->pos.offset - offs));3013full = full.trimr("\r\n ");3014return full;3015}30163017substr Parser::_scan_complex_key(csubstr currscalar, csubstr peeked_line)3018{3019_RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar));3020// NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice3021// size_t offs = m_state->pos.offset; // so we workaround by directly counting from the end of the given scalar3022_RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin());3023size_t offs = static_cast<size_t>(currscalar.end() - m_buf.begin());3024while(true)3025{3026_c4dbgp("rcplxkey: continuing...");3027if(peeked_line.begins_with("...") || peeked_line.begins_with("---"))3028{3029_c4dbgpf("rcplxkey: document termination next -- bail now '{}'", peeked_line.trimr("\r\n"));3030break;3031}3032else3033{3034size_t pos = peeked_line.first_of("?:[]{}");3035if(pos == csubstr::npos)3036{3037pos = peeked_line.find("- ");3038}3039if(pos != csubstr::npos)3040{3041_c4dbgpf("rcplxkey: found special characters at pos={}: '{}'", pos, peeked_line.trimr("\r\n"));3042_line_progressed(pos);3043break;3044}3045}30463047_c4dbgpf("rcplxkey: no special chars found '{}'", peeked_line.trimr("\r\n"));3048csubstr next_peeked = _scan_to_next_nonempty_line(0);3049if(next_peeked.empty())3050{3051_c4dbgp("rcplxkey: empty ... finished.");3052break;3053}3054_c4dbgp("rcplxkey: ... continuing.");3055peeked_line = next_peeked;30563057_c4dbgpf("rcplxkey: line contents: '{}'", peeked_line.trimr("\r\n"));3058size_t colpos;3059if((colpos = peeked_line.find(": ")) != npos)3060{3061_c4dbgp("rcplxkey: found ': ', stopping.");3062_line_progressed(colpos);3063break;3064}3065#ifdef RYML_NO_COVERAGE__TO_BE_DELETED3066else if((colpos = peeked_line.ends_with(':')))3067{3068_c4dbgp("rcplxkey: ends with ':', stopping.");3069_line_progressed(colpos);3070break;3071}3072#endif3073_c4dbgpf("rcplxkey: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n"));3074if(!_advance_to_peeked())3075{3076_c4dbgp("rcplxkey: file finishes after the scalar");3077break;3078}3079peeked_line = m_state->line_contents.rem;3080}3081_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs);3082substr full(m_buf.str + (currscalar.str - m_buf.str),3083currscalar.len + (m_state->pos.offset - offs));3084return full;3085}30863087//! scans to the next non-blank line starting with the given indentation3088csubstr Parser::_scan_to_next_nonempty_line(size_t indentation)3089{3090csubstr next_peeked;3091while(true)3092{3093_c4dbgpf("rscalar: ... curr offset: {} indentation={}", m_state->pos.offset, indentation);3094next_peeked = _peek_next_line(m_state->pos.offset);3095csubstr next_peeked_triml = next_peeked.triml(' ');3096_c4dbgpf("rscalar: ... next peeked line='{}'", next_peeked.trimr("\r\n"));3097if(next_peeked_triml.begins_with('#'))3098{3099_c4dbgp("rscalar: ... first non-space character is #");3100return {};3101}3102else if(next_peeked.begins_with(' ', indentation))3103{3104_c4dbgpf("rscalar: ... begins at same indentation {}, assuming continuation", indentation);3105_advance_to_peeked();3106return next_peeked;3107}3108else // check for de-indentation3109{3110csubstr trimmed = next_peeked_triml.trimr("\t\r\n");3111_c4dbgpf("rscalar: ... deindented! trimmed='{}'", trimmed);3112if(!trimmed.empty())3113{3114_c4dbgp("rscalar: ... and not empty. bailing out.");3115return {};3116}3117}3118if(!_advance_to_peeked())3119{3120_c4dbgp("rscalar: file finished");3121return {};3122}3123}3124return {};3125}31263127// returns false when the file finished3128bool Parser::_advance_to_peeked()3129{3130_line_progressed(m_state->line_contents.rem.len);3131_line_ended(); // advances to the peeked-at line, consuming all remaining (probably newline) characters on the current line3132_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.first_of("\r\n") == csubstr::npos);3133_c4dbgpf("advance to peeked: scan more... pos={} len={}", m_state->pos.offset, m_buf.len);3134_scan_line(); // puts the peeked-at line in the buffer3135if(_finished_file())3136{3137_c4dbgp("rscalar: finished file!");3138return false;3139}3140return true;3141}31423143//-----------------------------------------------------------------------------31443145C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)3146{3147return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');3148}31493150//! look for the next newline chars, and jump to the right of those3151csubstr from_next_line(csubstr rem)3152{3153size_t nlpos = rem.first_of("\r\n");3154if(nlpos == csubstr::npos)3155return {};3156const char nl = rem[nlpos];3157rem = rem.right_of(nlpos);3158if(rem.empty())3159return {};3160if(_extend_from_combined_newline(nl, rem.front()))3161rem = rem.sub(1);3162return rem;3163}31643165csubstr Parser::_peek_next_line(size_t pos) const3166{3167csubstr rem{}; // declare here because of the goto3168size_t nlpos{}; // declare here because of the goto3169pos = pos == npos ? m_state->pos.offset : pos;3170if(pos >= m_buf.len)3171goto next_is_empty;31723173// look for the next newline chars, and jump to the right of those3174rem = from_next_line(m_buf.sub(pos));3175if(rem.empty())3176goto next_is_empty;31773178// now get everything up to and including the following newline chars3179nlpos = rem.first_of("\r\n");3180if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))3181nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);3182rem = rem.left_of(nlpos, /*include_pos*/true);31833184_c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));3185return rem;31863187next_is_empty:3188_c4dbgpf("peek next line @ {}: (len=0)''", pos);3189return {};3190}319131923193//-----------------------------------------------------------------------------3194void Parser::LineContents::reset_with_next_line(csubstr buf, size_t offset)3195{3196RYML_ASSERT(offset <= buf.len);3197char const* C4_RESTRICT b = &buf[offset];3198char const* C4_RESTRICT e = b;3199// get the current line stripped of newline chars3200while(e < buf.end() && (*e != '\n' && *e != '\r'))3201++e;3202RYML_ASSERT(e >= b);3203const csubstr stripped_ = buf.sub(offset, static_cast<size_t>(e - b));3204// advance pos to include the first line ending3205if(e != buf.end() && *e == '\r')3206++e;3207if(e != buf.end() && *e == '\n')3208++e;3209RYML_ASSERT(e >= b);3210const csubstr full_ = buf.sub(offset, static_cast<size_t>(e - b));3211reset(full_, stripped_);3212}32133214void Parser::_scan_line()3215{3216if(m_state->pos.offset >= m_buf.len)3217{3218m_state->line_contents.reset(m_buf.last(0), m_buf.last(0));3219return;3220}3221m_state->line_contents.reset_with_next_line(m_buf, m_state->pos.offset);3222}322332243225//-----------------------------------------------------------------------------3226void Parser::_line_progressed(size_t ahead)3227{3228_c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, ahead, m_state->pos.col, m_state->pos.col+ahead, m_state->pos.offset, m_state->pos.offset+ahead);3229m_state->pos.offset += ahead;3230m_state->pos.col += ahead;3231_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col <= m_state->line_contents.stripped.len+1);3232m_state->line_contents.rem = m_state->line_contents.rem.sub(ahead);3233}32343235void Parser::_line_ended()3236{3237_c4dbgpf("line[{}] ({} cols) ended! offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, m_state->pos.offset, m_state->pos.offset+m_state->line_contents.full.len - m_state->line_contents.stripped.len);3238_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == m_state->line_contents.stripped.len+1);3239m_state->pos.offset += m_state->line_contents.full.len - m_state->line_contents.stripped.len;3240++m_state->pos.line;3241m_state->pos.col = 1;3242}32433244void Parser::_line_ended_undo()3245{3246_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == 1u);3247_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line > 0u);3248_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_state->line_contents.full.len - m_state->line_contents.stripped.len);3249size_t delta = m_state->line_contents.full.len - m_state->line_contents.stripped.len;3250_c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - delta);3251m_state->pos.offset -= delta;3252--m_state->pos.line;3253m_state->pos.col = m_state->line_contents.stripped.len + 1u;3254// don't forget to undo also the changes to the remainder of the line3255_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_buf.len || m_buf[m_state->pos.offset] == '\n' || m_buf[m_state->pos.offset] == '\r');3256m_state->line_contents.rem = m_buf.sub(m_state->pos.offset, 0);3257}325832593260//-----------------------------------------------------------------------------3261void Parser::_set_indentation(size_t indentation)3262{3263m_state->indref = indentation;3264_c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref);3265}32663267void Parser::_save_indentation(size_t behind)3268{3269_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begin() >= m_state->line_contents.full.begin());3270m_state->indref = static_cast<size_t>(m_state->line_contents.rem.begin() - m_state->line_contents.full.begin());3271_RYML_CB_ASSERT(m_stack.m_callbacks, behind <= m_state->indref);3272m_state->indref -= behind;3273_c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref);3274}32753276bool Parser::_maybe_set_indentation_from_anchor_or_tag()3277{3278if(m_key_anchor.not_empty())3279{3280_c4dbgpf("set indentation from key anchor: {}", m_key_anchor_indentation);3281_set_indentation(m_key_anchor_indentation); // this is the column where the anchor starts3282return true;3283}3284else if(m_key_tag.not_empty())3285{3286_c4dbgpf("set indentation from key tag: {}", m_key_tag_indentation);3287_set_indentation(m_key_tag_indentation); // this is the column where the tag starts3288return true;3289}3290return false;3291}329232933294//-----------------------------------------------------------------------------3295void Parser::_write_key_anchor(size_t node_id)3296{3297_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->has_key(node_id));3298if( ! m_key_anchor.empty())3299{3300_c4dbgpf("node={}: set key anchor to '{}'", node_id, m_key_anchor);3301m_tree->set_key_anchor(node_id, m_key_anchor);3302m_key_anchor.clear();3303m_key_anchor_was_before = false;3304m_key_anchor_indentation = 0;3305}3306else if( ! m_tree->is_key_quoted(node_id))3307{3308csubstr r = m_tree->key(node_id);3309if(r.begins_with('*'))3310{3311_c4dbgpf("node={}: set key reference: '{}'", node_id, r);3312m_tree->set_key_ref(node_id, r.sub(1));3313}3314else if(r == "<<")3315{3316m_tree->set_key_ref(node_id, r);3317_c4dbgpf("node={}: it's an inheriting reference", node_id);3318if(m_tree->is_seq(node_id))3319{3320_c4dbgpf("node={}: inheriting from seq of {}", node_id, m_tree->num_children(node_id));3321for(size_t i = m_tree->first_child(node_id); i != NONE; i = m_tree->next_sibling(i))3322{3323if( ! (m_tree->val(i).begins_with('*')))3324_c4err("malformed reference: '{}'", m_tree->val(i));3325}3326}3327else if( ! m_tree->val(node_id).begins_with('*'))3328{3329_c4err("malformed reference: '{}'", m_tree->val(node_id));3330}3331//m_tree->set_key_ref(node_id, r);3332}3333}3334}33353336//-----------------------------------------------------------------------------3337void Parser::_write_val_anchor(size_t node_id)3338{3339if( ! m_val_anchor.empty())3340{3341_c4dbgpf("node={}: set val anchor to '{}'", node_id, m_val_anchor);3342m_tree->set_val_anchor(node_id, m_val_anchor);3343m_val_anchor.clear();3344}3345csubstr r = m_tree->has_val(node_id) ? m_tree->val(node_id) : "";3346if(!m_tree->is_val_quoted(node_id) && r.begins_with('*'))3347{3348_c4dbgpf("node={}: set val reference: '{}'", node_id, r);3349RYML_CHECK(!m_tree->has_val_anchor(node_id));3350m_tree->set_val_ref(node_id, r.sub(1));3351}3352}33533354//-----------------------------------------------------------------------------3355void Parser::_push_level(bool explicit_flow_chars)3356{3357_c4dbgpf("pushing level! currnode={} currlevel={} stacksize={} stackcap={}", m_state->node_id, m_state->level, m_stack.size(), m_stack.capacity());3358_RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top());3359if(node(m_state) == nullptr)3360{3361_c4dbgp("pushing level! actually no, current node is null");3362//_RYML_CB_ASSERT(m_stack.m_callbacks, ! explicit_flow_chars);3363return;3364}3365flag_t st = RUNK;3366if(explicit_flow_chars || has_all(FLOW))3367{3368st |= FLOW;3369}3370m_stack.push_top();3371m_state = &m_stack.top();3372set_flags(st);3373m_state->node_id = (size_t)NONE;3374m_state->indref = (size_t)NONE;3375++m_state->level;3376_c4dbgpf("pushing level: now, currlevel={}", m_state->level);3377}33783379void Parser::_pop_level()3380{3381_c4dbgpf("popping level! currnode={} currlevel={}", m_state->node_id, m_state->level);3382if(has_any(RMAP) || m_tree->is_map(m_state->node_id))3383{3384_stop_map();3385}3386if(has_any(RSEQ) || m_tree->is_seq(m_state->node_id))3387{3388_stop_seq();3389}3390if(m_tree->is_doc(m_state->node_id))3391{3392_stop_doc();3393}3394_RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() > 1);3395_prepare_pop();3396m_stack.pop();3397m_state = &m_stack.top();3398/*if(has_any(RMAP))3399{3400_toggle_key_val();3401}*/3402if(m_state->line_contents.indentation == 0)3403{3404//_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RTOP));3405add_flags(RTOP);3406}3407_c4dbgpf("popping level: now, currnode={} currlevel={}", m_state->node_id, m_state->level);3408}34093410//-----------------------------------------------------------------------------3411void Parser::_start_unk(bool /*as_child*/)3412{3413_c4dbgp("start_unk");3414_push_level();3415_move_scalar_from_top();3416}34173418//-----------------------------------------------------------------------------3419void Parser::_start_doc(bool as_child)3420{3421_c4dbgpf("start_doc (as child={})", as_child);3422_RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id));3423size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id;3424_RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE);3425_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_root(parent_id));3426_RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id));3427if(as_child)3428{3429_c4dbgpf("start_doc: parent={}", parent_id);3430if( ! m_tree->is_stream(parent_id))3431{3432_c4dbgp("start_doc: rearranging with root as STREAM");3433m_tree->set_root_as_stream();3434}3435m_state->node_id = m_tree->append_child(parent_id);3436m_tree->to_doc(m_state->node_id);3437}3438#ifdef RYML_NO_COVERAGE__TO_BE_DELETED3439else3440{3441_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(parent_id) || m_tree->empty(parent_id));3442m_state->node_id = parent_id;3443if( ! m_tree->is_doc(parent_id))3444{3445m_tree->to_doc(parent_id, DOC);3446}3447}3448#endif3449_c4dbgpf("start_doc: id={}", m_state->node_id);3450add_flags(RUNK|RTOP|NDOC);3451_handle_types();3452rem_flags(NDOC);3453}34543455void Parser::_stop_doc()3456{3457size_t doc_node = m_state->node_id;3458_c4dbgpf("stop_doc[{}]", doc_node);3459_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_doc(doc_node));3460if(!m_tree->is_seq(doc_node) && !m_tree->is_map(doc_node) && !m_tree->is_val(doc_node))3461{3462_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL));3463_c4dbgpf("stop_doc[{}]: there was nothing; adding null val", doc_node);3464m_tree->to_val(doc_node, {}, DOC);3465}3466}34673468void Parser::_end_stream()3469{3470_c4dbgpf("end_stream, level={} node_id={}", m_state->level, m_state->node_id);3471_RYML_CB_ASSERT(m_stack.m_callbacks, ! m_stack.empty());3472NodeData *added = nullptr;3473if(has_any(SSCL))3474{3475if(m_tree->is_seq(m_state->node_id))3476{3477_c4dbgp("append val...");3478added = _append_val(_consume_scalar());3479}3480else if(m_tree->is_map(m_state->node_id))3481{3482_c4dbgp("append null key val...");3483added = _append_key_val_null(m_state->line_contents.rem.str);3484#ifdef RYML_NO_COVERAGE__TO_BE_DELETED3485if(has_any(RSEQIMAP))3486{3487_stop_seqimap();3488_pop_level();3489}3490#endif3491}3492else if(m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE)3493{3494NodeType_e quoted = has_any(QSCL) ? VALQUO : NOTYPE; // do this before consuming the scalar3495csubstr scalar = _consume_scalar();3496_c4dbgpf("node[{}]: to docval '{}'{}", m_state->node_id, scalar, quoted == VALQUO ? ", quoted" : "");3497m_tree->to_val(m_state->node_id, scalar, DOC|quoted);3498added = m_tree->get(m_state->node_id);3499}3500else3501{3502_c4err("internal error");3503}3504}3505else if(has_all(RSEQ|RVAL) && has_none(FLOW))3506{3507_c4dbgp("add last...");3508added = _append_val_null(m_state->line_contents.rem.str);3509}3510else if(!m_val_tag.empty() && (m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE))3511{3512csubstr scalar = m_state->line_contents.rem.first(0);3513_c4dbgpf("node[{}]: add null scalar as docval", m_state->node_id);3514m_tree->to_val(m_state->node_id, scalar, DOC);3515added = m_tree->get(m_state->node_id);3516}35173518if(added)3519{3520size_t added_id = m_tree->id(added);3521if(m_tree->is_seq(m_state->node_id) || m_tree->is_doc(m_state->node_id))3522{3523if(!m_key_anchor.empty())3524{3525_c4dbgpf("node[{}]: move key to val anchor: '{}'", added_id, m_key_anchor);3526m_val_anchor = m_key_anchor;3527m_key_anchor = {};3528}3529if(!m_key_tag.empty())3530{3531_c4dbgpf("node[{}]: move key to val tag: '{}'", added_id, m_key_tag);3532m_val_tag = m_key_tag;3533m_key_tag = {};3534}3535}3536#ifdef RYML_NO_COVERAGE__TO_BE_DELETED3537if(!m_key_anchor.empty())3538{3539_c4dbgpf("node[{}]: set key anchor='{}'", added_id, m_key_anchor);3540m_tree->set_key_anchor(added_id, m_key_anchor);3541m_key_anchor = {};3542}3543#endif3544if(!m_val_anchor.empty())3545{3546_c4dbgpf("node[{}]: set val anchor='{}'", added_id, m_val_anchor);3547m_tree->set_val_anchor(added_id, m_val_anchor);3548m_val_anchor = {};3549}3550#ifdef RYML_NO_COVERAGE__TO_BE_DELETED3551if(!m_key_tag.empty())3552{3553_c4dbgpf("node[{}]: set key tag='{}' -> '{}'", added_id, m_key_tag, normalize_tag(m_key_tag));3554m_tree->set_key_tag(added_id, normalize_tag(m_key_tag));3555m_key_tag = {};3556}3557#endif3558if(!m_val_tag.empty())3559{3560_c4dbgpf("node[{}]: set val tag='{}' -> '{}'", added_id, m_val_tag, normalize_tag(m_val_tag));3561m_tree->set_val_tag(added_id, normalize_tag(m_val_tag));3562m_val_tag = {};3563}3564}35653566while(m_stack.size() > 1)3567{3568_c4dbgpf("popping level: {} (stack sz={})", m_state->level, m_stack.size());3569_RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL, &m_stack.top()));3570if(has_all(RSEQ|FLOW))3571_err("closing ] not found");3572_pop_level();3573}3574add_flags(NDOC);3575}35763577void Parser::_start_new_doc(csubstr rem)3578{3579_c4dbgp("_start_new_doc");3580_RYML_CB_ASSERT(m_stack.m_callbacks, rem.begins_with("---"));3581C4_UNUSED(rem);35823583_end_stream();35843585size_t indref = m_state->indref;3586_c4dbgpf("start a document, indentation={}", indref);3587_line_progressed(3);3588_push_level();3589_start_doc();3590_set_indentation(indref);3591}359235933594//-----------------------------------------------------------------------------3595void Parser::_start_map(bool as_child)3596{3597_c4dbgpf("start_map (as child={})", as_child);3598addrem_flags(RMAP|RVAL, RKEY|RUNK);3599_RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id));3600size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id;3601_RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE);3602_RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id));3603if(as_child)3604{3605m_state->node_id = m_tree->append_child(parent_id);3606if(has_all(SSCL))3607{3608type_bits key_quoted = NOTYPE;3609if(m_state->flags & QSCL) // before consuming the scalar3610key_quoted |= KEYQUO;3611csubstr key = _consume_scalar();3612m_tree->to_map(m_state->node_id, key, key_quoted);3613_c4dbgpf("start_map: id={} key='{}'", m_state->node_id, m_tree->key(m_state->node_id));3614_write_key_anchor(m_state->node_id);3615if( ! m_key_tag.empty())3616{3617_c4dbgpf("node[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag));3618m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag));3619m_key_tag.clear();3620}3621}3622else3623{3624m_tree->to_map(m_state->node_id);3625_c4dbgpf("start_map: id={}", m_state->node_id);3626}3627m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str;3628_write_val_anchor(m_state->node_id);3629}3630else3631{3632_RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE);3633m_state->node_id = parent_id;3634_c4dbgpf("start_map: id={}", m_state->node_id);3635type_bits as_doc = 0;3636if(m_tree->is_doc(m_state->node_id))3637as_doc |= DOC;3638if(!m_tree->is_map(parent_id))3639{3640RYML_CHECK(!m_tree->has_children(parent_id));3641m_tree->to_map(parent_id, as_doc);3642}3643else3644{3645m_tree->_add_flags(parent_id, as_doc);3646}3647_move_scalar_from_top();3648if(m_key_anchor.not_empty())3649m_key_anchor_was_before = true;3650_write_val_anchor(parent_id);3651if(m_stack.size() >= 2)3652{3653State const& parent_state = m_stack.top(1);3654if(parent_state.flags & RSET)3655add_flags(RSET);3656}3657m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str;3658}3659if( ! m_val_tag.empty())3660{3661_c4dbgpf("node[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag));3662m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag));3663m_val_tag.clear();3664}3665}36663667void Parser::_start_map_unk(bool as_child)3668{3669_c4dbgpf("start_map_unk (as child={})", as_child);3670if(!m_key_anchor_was_before)3671{3672_c4dbgpf("stash key anchor before starting map... '{}'", m_key_anchor);3673csubstr ka = m_key_anchor;3674m_key_anchor = {};3675_start_map(as_child);3676m_key_anchor = ka;3677}3678else3679{3680_start_map(as_child);3681m_key_anchor_was_before = false;3682}3683if(m_key_tag2.not_empty())3684{3685m_key_tag = m_key_tag2;3686m_key_tag_indentation = m_key_tag2_indentation;3687m_key_tag2.clear();3688m_key_tag2_indentation = 0;3689}3690}36913692void Parser::_stop_map()3693{3694_c4dbgpf("stop_map[{}]", m_state->node_id);3695_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id));3696if(has_all(QMRK|RKEY) && !has_all(SSCL))3697{3698_c4dbgpf("stop_map[{}]: RKEY", m_state->node_id);3699_store_scalar_null(m_state->line_contents.rem.str);3700_append_key_val_null(m_state->line_contents.rem.str);3701}3702}370337043705//-----------------------------------------------------------------------------3706void Parser::_start_seq(bool as_child)3707{3708_c4dbgpf("start_seq (as child={})", as_child);3709if(has_all(RTOP|RUNK))3710{3711_c4dbgpf("start_seq: moving key tag to val tag: '{}'", m_key_tag);3712m_val_tag = m_key_tag;3713m_key_tag.clear();3714}3715addrem_flags(RSEQ|RVAL, RUNK);3716_RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id));3717size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id;3718_RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE);3719_RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id));3720if(as_child)3721{3722m_state->node_id = m_tree->append_child(parent_id);3723if(has_all(SSCL))3724{3725_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(parent_id));3726type_bits key_quoted = 0;3727if(m_state->flags & QSCL) // before consuming the scalar3728key_quoted |= KEYQUO;3729csubstr key = _consume_scalar();3730m_tree->to_seq(m_state->node_id, key, key_quoted);3731_c4dbgpf("start_seq: id={} name='{}'", m_state->node_id, m_tree->key(m_state->node_id));3732_write_key_anchor(m_state->node_id);3733if( ! m_key_tag.empty())3734{3735_c4dbgpf("start_seq[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag));3736m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag));3737m_key_tag.clear();3738}3739}3740else3741{3742type_bits as_doc = 0;3743_RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->is_doc(m_state->node_id));3744m_tree->to_seq(m_state->node_id, as_doc);3745_c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as doc" : "");3746}3747_write_val_anchor(m_state->node_id);3748m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str;3749}3750else3751{3752m_state->node_id = parent_id;3753type_bits as_doc = 0;3754if(m_tree->is_doc(m_state->node_id))3755as_doc |= DOC;3756if(!m_tree->is_seq(parent_id))3757{3758RYML_CHECK(!m_tree->has_children(parent_id));3759m_tree->to_seq(parent_id, as_doc);3760}3761else3762{3763m_tree->_add_flags(parent_id, as_doc);3764}3765_move_scalar_from_top();3766_c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as_doc" : "");3767_write_val_anchor(parent_id);3768m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str;3769}3770if( ! m_val_tag.empty())3771{3772_c4dbgpf("start_seq[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag));3773m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag));3774m_val_tag.clear();3775}3776}37773778void Parser::_stop_seq()3779{3780_c4dbgp("stop_seq");3781_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id));3782}378337843785//-----------------------------------------------------------------------------3786void Parser::_start_seqimap()3787{3788_c4dbgpf("start_seqimap at node={}. has_children={}", m_state->node_id, m_tree->has_children(m_state->node_id));3789_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW));3790// create a map, and turn the last scalar of this sequence3791// into the key of the map's first child. This scalar was3792// understood to be a value in the sequence, but it is3793// actually a key of a map, implicitly opened here.3794// Eg [val, key: val]3795//3796// Yep, YAML is crazy.3797if(m_tree->has_children(m_state->node_id) && m_tree->has_val(m_tree->last_child(m_state->node_id)))3798{3799size_t prev = m_tree->last_child(m_state->node_id);3800NodeType ty = m_tree->_p(prev)->m_type; // don't use type() because it masks out the quotes3801NodeScalar tmp = m_tree->valsc(prev);3802_c4dbgpf("has children and last child={} has val. saving the scalars, val='{}' quoted={}", prev, tmp.scalar, ty.is_val_quoted());3803m_tree->remove(prev);3804_push_level();3805_start_map();3806_store_scalar(tmp.scalar, ty.is_val_quoted());3807m_key_anchor = tmp.anchor;3808m_key_tag = tmp.tag;3809}3810else3811{3812_c4dbgpf("node {} has no children yet, using empty key", m_state->node_id);3813_push_level();3814_start_map();3815_store_scalar_null(m_state->line_contents.rem.str);3816}3817add_flags(RSEQIMAP|FLOW);3818}38193820void Parser::_stop_seqimap()3821{3822_c4dbgp("stop_seqimap");3823_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQIMAP));3824}382538263827//-----------------------------------------------------------------------------3828NodeData* Parser::_append_val(csubstr val, flag_t quoted)3829{3830_RYML_CB_ASSERT(m_stack.m_callbacks, ! has_all(SSCL));3831_RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) != nullptr);3832_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id));3833type_bits additional_flags = quoted ? VALQUO : NOTYPE;3834_c4dbgpf("append val: '{}' to parent id={} (level={}){}", val, m_state->node_id, m_state->level, quoted ? " VALQUO!" : "");3835size_t nid = m_tree->append_child(m_state->node_id);3836m_tree->to_val(nid, val, additional_flags);3837_c4dbgpf("append val: id={} val='{}'", nid, m_tree->get(nid)->m_val.scalar);3838if( ! m_val_tag.empty())3839{3840_c4dbgpf("append val[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag));3841m_tree->set_val_tag(nid, normalize_tag(m_val_tag));3842m_val_tag.clear();3843}3844_write_val_anchor(nid);3845return m_tree->get(nid);3846}38473848NodeData* Parser::_append_key_val(csubstr val, flag_t val_quoted)3849{3850_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id));3851type_bits additional_flags = 0;3852if(m_state->flags & QSCL)3853additional_flags |= KEYQUO;3854if(val_quoted)3855additional_flags |= VALQUO;3856csubstr key = _consume_scalar();3857_c4dbgpf("append keyval: '{}' '{}' to parent id={} (level={}){}{}", key, val, m_state->node_id, m_state->level, (additional_flags & KEYQUO) ? " KEYQUO!" : "", (additional_flags & VALQUO) ? " VALQUO!" : "");3858size_t nid = m_tree->append_child(m_state->node_id);3859m_tree->to_keyval(nid, key, val, additional_flags);3860_c4dbgpf("append keyval: id={} key='{}' val='{}'", nid, m_tree->key(nid), m_tree->val(nid));3861if( ! m_key_tag.empty())3862{3863_c4dbgpf("append keyval[{}]: set key tag='{}' -> '{}'", nid, m_key_tag, normalize_tag(m_key_tag));3864m_tree->set_key_tag(nid, normalize_tag(m_key_tag));3865m_key_tag.clear();3866}3867if( ! m_val_tag.empty())3868{3869_c4dbgpf("append keyval[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag));3870m_tree->set_val_tag(nid, normalize_tag(m_val_tag));3871m_val_tag.clear();3872}3873_write_key_anchor(nid);3874_write_val_anchor(nid);3875rem_flags(QMRK);3876return m_tree->get(nid);3877}387838793880//-----------------------------------------------------------------------------3881void Parser::_store_scalar(csubstr s, flag_t is_quoted)3882{3883_c4dbgpf("state[{}]: storing scalar '{}' (flag: {}) (old scalar='{}')",3884m_state-m_stack.begin(), s, m_state->flags & SSCL, m_state->scalar);3885RYML_CHECK(has_none(SSCL));3886add_flags(SSCL | (is_quoted * QSCL));3887m_state->scalar = s;3888}38893890csubstr Parser::_consume_scalar()3891{3892_c4dbgpf("state[{}]: consuming scalar '{}' (flag: {}))", m_state-m_stack.begin(), m_state->scalar, m_state->flags & SSCL);3893RYML_CHECK(m_state->flags & SSCL);3894csubstr s = m_state->scalar;3895rem_flags(SSCL | QSCL);3896m_state->scalar.clear();3897return s;3898}38993900void Parser::_move_scalar_from_top()3901{3902if(m_stack.size() < 2) return;3903State &prev = m_stack.top(1);3904_RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top());3905_RYML_CB_ASSERT(m_stack.m_callbacks, m_state != &prev);3906if(prev.flags & SSCL)3907{3908_c4dbgpf("moving scalar '{}' from state[{}] to state[{}] (overwriting '{}')", prev.scalar, &prev-m_stack.begin(), m_state-m_stack.begin(), m_state->scalar);3909add_flags(prev.flags & (SSCL | QSCL));3910m_state->scalar = prev.scalar;3911rem_flags(SSCL | QSCL, &prev);3912prev.scalar.clear();3913}3914}39153916//-----------------------------------------------------------------------------3917/** @todo this function is a monster and needs love. Likely, it needs3918* to be split like _scan_scalar_*() */3919bool Parser::_handle_indentation()3920{3921_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW));3922if( ! _at_line_begin())3923return false;39243925size_t ind = m_state->line_contents.indentation;3926csubstr rem = m_state->line_contents.rem;3927/** @todo instead of trimming, we should use the indentation index from above */3928csubstr remt = rem.triml(' ');39293930if(remt.empty() || remt.begins_with('#')) // this is a blank or comment line3931{3932_line_progressed(rem.size());3933return true;3934}39353936_c4dbgpf("indentation? ind={} indref={}", ind, m_state->indref);3937if(ind == m_state->indref)3938{3939_c4dbgpf("same indentation: {}", ind);3940if(!rem.sub(ind).begins_with('-'))3941{3942_c4dbgp("does not begin with -");3943if(has_any(RMAP))3944{3945if(has_all(SSCL|RVAL))3946{3947_c4dbgp("add with null val");3948_append_key_val_null(rem.str + ind - 1);3949addrem_flags(RKEY, RVAL);3950}3951}3952else if(has_any(RSEQ))3953{3954if(m_stack.size() > 2) // do not pop to root level3955{3956if(has_any(RNXT))3957{3958_c4dbgp("end the indentless seq");3959_pop_level();3960return true;3961}3962else if(has_any(RVAL))3963{3964_c4dbgp("add with null val");3965_append_val_null(rem.str);3966_c4dbgp("end the indentless seq");3967_pop_level();3968return true;3969}3970}3971}3972}3973_line_progressed(ind);3974return ind > 0;3975}3976else if(ind < m_state->indref)3977{3978_c4dbgpf("smaller indentation ({} < {})!!!", ind, m_state->indref);3979if(has_all(RVAL))3980{3981_c4dbgp("there was an empty val -- appending");3982if(has_all(RMAP))3983{3984_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL));3985_append_key_val_null(rem.sub(ind).str - 1);3986}3987else if(has_all(RSEQ))3988{3989_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL));3990_append_val_null(rem.sub(ind).str - 1);3991}3992}3993// search the stack frame to jump to based on its indentation3994State const* popto = nullptr;3995_RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.is_contiguous()); // this search relies on the stack being contiguous3996for(State const* s = m_state-1; s >= m_stack.begin(); --s)3997{3998_c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);3999if(s->indref == ind)4000{4001_c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);4002popto = s;4003// while it may be tempting to think we're done at this4004// point, we must still determine whether we're jumping to a4005// parent with the same indentation. Consider this case with4006// an indentless sequence:4007//4008// product:4009// - sku: BL394D4010// quantity: 44011// description: Basketball4012// price: 450.004013// - sku: BL4438H4014// quantity: 14015// description: Super Hoop4016// price: 2392.00 # jumping one level here would be wrong.4017// tax: 1234.5 # we must jump two levels4018if(popto > m_stack.begin())4019{4020auto parent = popto - 1;4021if(parent->indref == popto->indref)4022{4023_c4dbgpf("the parent (level={},node={}) has the same indentation ({}). is this in an indentless sequence?", parent->level, parent->node_id, popto->indref);4024_c4dbgpf("isseq(popto)={} ismap(parent)={}", m_tree->is_seq(popto->node_id), m_tree->is_map(parent->node_id));4025if(m_tree->is_seq(popto->node_id) && m_tree->is_map(parent->node_id))4026{4027if( ! remt.begins_with('-'))4028{4029_c4dbgp("this is an indentless sequence");4030popto = parent;4031}4032else4033{4034_c4dbgp("not an indentless sequence");4035}4036}4037}4038}4039break;4040}4041}4042if(!popto || popto >= m_state || popto->level >= m_state->level)4043{4044_c4err("parse error: incorrect indentation?");4045}4046_c4dbgpf("popping {} levels: from level {} to level {}", m_state->level-popto->level, m_state->level, popto->level);4047while(m_state != popto)4048{4049_c4dbgpf("popping level {} (indentation={})", m_state->level, m_state->indref);4050_pop_level();4051}4052_RYML_CB_ASSERT(m_stack.m_callbacks, ind == m_state->indref);4053_line_progressed(ind);4054return true;4055}4056else4057{4058_c4dbgpf("larger indentation ({} > {})!!!", ind, m_state->indref);4059_RYML_CB_ASSERT(m_stack.m_callbacks, ind > m_state->indref);4060if(has_all(RMAP|RVAL))4061{4062if(_is_scalar_next__rmap_val(remt) && (!remt.first_of_any(": ", "? ")) && (!remt.ends_with(":")))4063{4064_c4dbgpf("actually it seems a value: '{}'", remt);4065}4066else4067{4068addrem_flags(RKEY, RVAL);4069_start_unk();4070//_move_scalar_from_top();4071_line_progressed(ind);4072_save_indentation();4073return true;4074}4075}4076else if(has_all(RSEQ|RVAL))4077{4078// nothing to do here4079}4080else4081{4082_c4err("parse error - indentation should not increase at this point");4083}4084}40854086return false;4087}40884089//-----------------------------------------------------------------------------4090csubstr Parser::_scan_comment()4091{4092csubstr s = m_state->line_contents.rem;4093_RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('#'));4094_line_progressed(s.len);4095// skip the # character4096s = s.sub(1);4097// skip leading whitespace4098s = s.right_of(s.first_not_of(' '), /*include_pos*/true);4099_c4dbgpf("comment was '{}'", s);4100return s;4101}41024103//-----------------------------------------------------------------------------4104csubstr Parser::_scan_squot_scalar()4105{4106// quoted scalars can spread over multiple lines!4107// nice explanation here: http://yaml-multiline.info/41084109// a span to the end of the file4110size_t b = m_state->pos.offset;4111substr s = m_buf.sub(b);4112if(s.begins_with(' '))4113{4114s = s.triml(' ');4115_RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s));4116_RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());4117_line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));4118}4119b = m_state->pos.offset; // take this into account4120_RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('\''));41214122// skip the opening quote4123_line_progressed(1);4124s = s.sub(1);41254126bool needs_filter = false;41274128size_t numlines = 1; // we already have one line4129size_t pos = npos; // find the pos of the matching quote4130while( ! _finished_file())4131{4132const csubstr line = m_state->line_contents.rem;4133bool line_is_blank = true;4134_c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_state->pos.line, line);4135for(size_t i = 0; i < line.len; ++i)4136{4137const char curr = line.str[i];4138if(curr == '\'') // single quotes are escaped with two single quotes4139{4140const char next = i+1 < line.len ? line.str[i+1] : '~';4141if(next != '\'') // so just look for the first quote4142{ // without another after it4143pos = i;4144break;4145}4146else4147{4148needs_filter = true; // needs filter to remove escaped quotes4149++i; // skip the escaped quote4150}4151}4152else if(curr != ' ')4153{4154line_is_blank = false;4155}4156}41574158// leading whitespace also needs filtering4159needs_filter = needs_filter4160|| (numlines > 1)4161|| line_is_blank4162|| (_at_line_begin() && line.begins_with(' '));41634164if(pos == npos)4165{4166_line_progressed(line.len);4167++numlines;4168}4169else4170{4171_RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);4172_RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '\'');4173_line_progressed(pos + 1); // progress beyond the quote4174pos = m_state->pos.offset - b - 1; // but we stop before it4175break;4176}41774178_line_ended();4179_scan_line();4180}41814182if(pos == npos)4183{4184_c4err("reached end of file while looking for closing quote");4185}4186else4187{4188_RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0);4189_RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());4190_RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'');4191s = s.sub(0, pos-1);4192}41934194if(needs_filter)4195{4196csubstr ret = _filter_squot_scalar(s);4197_RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty());4198_c4dbgpf("final scalar: \"{}\"", ret);4199return ret;4200}42014202_c4dbgpf("final scalar: \"{}\"", s);42034204return s;4205}42064207//-----------------------------------------------------------------------------4208csubstr Parser::_scan_dquot_scalar()4209{4210// quoted scalars can spread over multiple lines!4211// nice explanation here: http://yaml-multiline.info/42124213// a span to the end of the file4214size_t b = m_state->pos.offset;4215substr s = m_buf.sub(b);4216if(s.begins_with(' '))4217{4218s = s.triml(' ');4219_RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s));4220_RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());4221_line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));4222}4223b = m_state->pos.offset; // take this into account4224_RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('"'));42254226// skip the opening quote4227_line_progressed(1);4228s = s.sub(1);42294230bool needs_filter = false;42314232size_t numlines = 1; // we already have one line4233size_t pos = npos; // find the pos of the matching quote4234while( ! _finished_file())4235{4236const csubstr line = m_state->line_contents.rem;4237bool line_is_blank = true;4238_c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_state->pos.line, line);4239for(size_t i = 0; i < line.len; ++i)4240{4241const char curr = line.str[i];4242if(curr != ' ')4243line_is_blank = false;4244// every \ is an escape4245if(curr == '\\')4246{4247const char next = i+1 < line.len ? line.str[i+1] : '~';4248needs_filter = true;4249if(next == '"' || next == '\\')4250++i;4251}4252else if(curr == '"')4253{4254pos = i;4255break;4256}4257}42584259// leading whitespace also needs filtering4260needs_filter = needs_filter4261|| (numlines > 1)4262|| line_is_blank4263|| (_at_line_begin() && line.begins_with(' '));42644265if(pos == npos)4266{4267_line_progressed(line.len);4268++numlines;4269}4270else4271{4272_RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);4273_RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '"');4274_line_progressed(pos + 1); // progress beyond the quote4275pos = m_state->pos.offset - b - 1; // but we stop before it4276break;4277}42784279_line_ended();4280_scan_line();4281}42824283if(pos == npos)4284{4285_c4err("reached end of file looking for closing quote");4286}4287else4288{4289_RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0);4290_RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"');4291_RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());4292s = s.sub(0, pos-1);4293}42944295if(needs_filter)4296{4297csubstr ret = _filter_dquot_scalar(s);4298_c4dbgpf("final scalar: [{}]\"{}\"", ret.len, ret);4299_RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty());4300return ret;4301}43024303_c4dbgpf("final scalar: \"{}\"", s);43044305return s;4306}43074308//-----------------------------------------------------------------------------4309csubstr Parser::_scan_block()4310{4311// nice explanation here: http://yaml-multiline.info/4312csubstr s = m_state->line_contents.rem;4313csubstr trimmed = s.triml(' ');4314if(trimmed.str > s.str)4315{4316_c4dbgp("skipping whitespace");4317_RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= s.str);4318_line_progressed(static_cast<size_t>(trimmed.str - s.str));4319s = trimmed;4320}4321_RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'));43224323_c4dbgpf("scanning block: specs=\"{}\"", s);43244325// parse the spec4326BlockStyle_e newline = s.begins_with('>') ? BLOCK_FOLD : BLOCK_LITERAL;4327BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used4328size_t indentation = npos; // have to find out if no spec is given4329csubstr digits;4330if(s.len > 1)4331{4332_RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with_any("|>"));4333csubstr t = s.sub(1);4334_c4dbgpf("scanning block: spec is multichar: '{}'", t);4335_RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1);4336size_t pos = t.first_of("-+");4337_c4dbgpf("scanning block: spec chomp char at {}", pos);4338if(pos != npos)4339{4340if(t[pos] == '-')4341chomp = CHOMP_STRIP;4342else if(t[pos] == '+')4343chomp = CHOMP_KEEP;4344if(pos == 0)4345t = t.sub(1);4346else4347t = t.first(pos);4348}4349// from here to the end, only digits are considered4350digits = t.left_of(t.first_not_of("0123456789"));4351if( ! digits.empty())4352{4353if( ! c4::atou(digits, &indentation))4354_c4err("parse error: could not read decimal");4355_c4dbgpf("scanning block: indentation specified: {}. add {} from curr state -> {}", indentation, m_state->indref, indentation+m_state->indref);4356indentation += m_state->indref;4357}4358}43594360// finish the current line4361_line_progressed(s.len);4362_line_ended();4363_scan_line();43644365_c4dbgpf("scanning block: style={} chomp={} indentation={}", newline==BLOCK_FOLD ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);43664367// start with a zero-length block, already pointing at the right place4368substr raw_block(m_buf.data() + m_state->pos.offset, size_t(0));// m_state->line_contents.full.sub(0, 0);4369_RYML_CB_ASSERT(m_stack.m_callbacks, raw_block.begin() == m_state->line_contents.full.begin());43704371// read every full line into a raw block,4372// from which newlines are to be stripped as needed.4373//4374// If no explicit indentation was given, pick it from the first4375// non-empty line. See4376// https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator4377size_t num_lines = 0, first = m_state->pos.line, provisional_indentation = npos;4378LineContents lc;4379while(( ! _finished_file()))4380{4381// peek next line, but do not advance immediately4382lc.reset_with_next_line(m_buf, m_state->pos.offset);4383_c4dbgpf("scanning block: peeking at '{}'", lc.stripped);4384// evaluate termination conditions4385if(indentation != npos)4386{4387// stop when the line is deindented and not empty4388if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty()))4389{4390if(raw_block.len)4391{4392_c4dbgpf("scanning block: indentation decreased ref={} thisline={}", indentation, lc.indentation);4393}4394else4395{4396_c4err("indentation decreased without any scalar");4397}4398break;4399}4400else if(indentation == 0)4401{4402if((lc.rem == "..." || lc.rem.begins_with("... "))4403||4404(lc.rem == "---" || lc.rem.begins_with("--- ")))4405{4406_c4dbgp("scanning block: stop. indentation=0 and stream ended");4407break;4408}4409}4410}4411else4412{4413_c4dbgpf("scanning block: indentation ref not set. firstnonws={}", lc.stripped.first_not_of(' '));4414if(lc.stripped.first_not_of(' ') != npos) // non-empty line4415{4416_c4dbgpf("scanning block: line not empty. indref={} indprov={} indentation={}", m_state->indref, provisional_indentation, lc.indentation);4417if(provisional_indentation == npos)4418{4419if(lc.indentation < m_state->indref)4420{4421_c4dbgpf("scanning block: block terminated indentation={} < indref={}", lc.indentation, m_state->indref);4422if(raw_block.len == 0)4423{4424_c4dbgp("scanning block: was empty, undo next line");4425_line_ended_undo();4426}4427break;4428}4429else if(lc.indentation == m_state->indref)4430{4431if(has_any(RSEQ|RMAP))4432{4433_c4dbgpf("scanning block: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_state->indref);4434break;4435}4436}4437_c4dbgpf("scanning block: set indentation ref from this line: ref={}", lc.indentation);4438indentation = lc.indentation;4439}4440else4441{4442if(lc.indentation >= provisional_indentation)4443{4444_c4dbgpf("scanning block: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);4445//indentation = provisional_indentation ? provisional_indentation : lc.indentation;4446indentation = lc.indentation;4447}4448else4449{4450break;4451//_c4err("parse error: first non-empty block line should have at least the original indentation");4452}4453}4454}4455else // empty line4456{4457_c4dbgpf("scanning block: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);4458if(provisional_indentation != npos)4459{4460if(lc.stripped.len >= provisional_indentation)4461{4462_c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);4463provisional_indentation = lc.stripped.len;4464}4465#ifdef RYML_NO_COVERAGE__TO_BE_DELETED4466else if(lc.indentation >= provisional_indentation && lc.indentation != npos)4467{4468_c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);4469provisional_indentation = lc.indentation;4470}4471#endif4472}4473else4474{4475provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);4476_c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation);4477if(provisional_indentation == npos)4478{4479provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL);4480_c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation);4481}4482}4483}4484}4485// advance now that we know the folded scalar continues4486m_state->line_contents = lc;4487_c4dbgpf("scanning block: append '{}'", m_state->line_contents.rem);4488raw_block.len += m_state->line_contents.full.len;4489_line_progressed(m_state->line_contents.rem.len);4490_line_ended();4491++num_lines;4492}4493_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line == (first + num_lines) || (raw_block.len == 0));4494C4_UNUSED(num_lines);4495C4_UNUSED(first);44964497if(indentation == npos)4498{4499_c4dbgpf("scanning block: set indentation from provisional: {}", provisional_indentation);4500indentation = provisional_indentation;4501}45024503if(num_lines)4504_line_ended_undo();45054506_c4dbgpf("scanning block: raw=~~~{}~~~", raw_block);45074508// ok! now we strip the newlines and spaces according to the specs4509s = _filter_block_scalar(raw_block, newline, chomp, indentation);45104511_c4dbgpf("scanning block: final=~~~{}~~~", s);45124513return s;4514}451545164517//-----------------------------------------------------------------------------45184519template<bool backslash_is_escape, bool keep_trailing_whitespace>4520bool Parser::_filter_nl(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos, size_t indentation)4521{4522// a debugging scaffold:4523#if 04524#define _c4dbgfnl(fmt, ...) _c4dbgpf("filter_nl[{}]: " fmt, *i, __VA_ARGS__)4525#else4526#define _c4dbgfnl(...)4527#endif45284529const char curr = r[*i];4530bool replaced = false;45314532_RYML_CB_ASSERT(m_stack.m_callbacks, indentation != npos);4533_RYML_CB_ASSERT(m_stack.m_callbacks, curr == '\n');45344535_c4dbgfnl("found newline. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos));4536size_t ii = *i;4537size_t numnl_following = count_following_newlines(r, &ii, indentation);4538if(numnl_following)4539{4540_c4dbgfnl("{} consecutive (empty) lines {} in the middle. totalws={}", 1+numnl_following, ii < r.len ? "in the middle" : "at the end", ii - *i);4541for(size_t j = 0; j < numnl_following; ++j)4542m_filter_arena.str[(*pos)++] = '\n';4543}4544else4545{4546if(r.first_not_of(" \t", *i+1) != npos)4547{4548m_filter_arena.str[(*pos)++] = ' ';4549_c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos));4550replaced = true;4551}4552else4553{4554if C4_IF_CONSTEXPR (keep_trailing_whitespace)4555{4556m_filter_arena.str[(*pos)++] = ' ';4557_c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos));4558replaced = true;4559}4560else4561{4562_c4dbgfnl("last newline, everything else is whitespace. ii={}/{}", ii, r.len);4563*i = r.len;4564}4565}4566if C4_IF_CONSTEXPR (backslash_is_escape)4567{4568if(ii < r.len && r.str[ii] == '\\')4569{4570const char next = ii+1 < r.len ? r.str[ii+1] : '\0';4571if(next == ' ' || next == '\t')4572{4573_c4dbgfnl("extend skip to backslash{}", "");4574++ii;4575}4576}4577}4578}4579*i = ii - 1; // correct for the loop increment45804581#undef _c4dbgfnl45824583return replaced;4584}458545864587//-----------------------------------------------------------------------------45884589template<bool keep_trailing_whitespace>4590void Parser::_filter_ws(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos)4591{4592// a debugging scaffold:4593#if 04594#define _c4dbgfws(fmt, ...) _c4dbgpf("filt_nl[{}]: " fmt, *i, __VA_ARGS__)4595#else4596#define _c4dbgfws(...)4597#endif45984599const char curr = r[*i];4600_c4dbgfws("found whitespace '{}'", _c4prc(curr));4601_RYML_CB_ASSERT(m_stack.m_callbacks, curr == ' ' || curr == '\t');46024603size_t first = *i > 0 ? r.first_not_of(" \t", *i) : r.first_not_of(' ', *i);4604if(first != npos)4605{4606if(r[first] == '\n' || r[first] == '\r') // skip trailing whitespace4607{4608_c4dbgfws("whitespace is trailing on line. firstnonws='{}'@{}", _c4prc(r[first]), first);4609*i = first - 1; // correct for the loop increment4610}4611else // a legit whitespace4612{4613m_filter_arena.str[(*pos)++] = curr;4614_c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos));4615}4616}4617else4618{4619_c4dbgfws("... everything else is trailing whitespace{}", "");4620if C4_IF_CONSTEXPR (keep_trailing_whitespace)4621for(size_t j = *i; j < r.len; ++j)4622m_filter_arena.str[(*pos)++] = r[j];4623*i = r.len;4624}46254626#undef _c4dbgfws4627}462846294630//-----------------------------------------------------------------------------4631csubstr Parser::_filter_plain_scalar(substr s, size_t indentation)4632{4633// a debugging scaffold:4634#if 04635#define _c4dbgfps(...) _c4dbgpf("filt_plain_scalar" __VA_ARGS__)4636#else4637#define _c4dbgfps(...)4638#endif46394640_c4dbgfps("before=~~~{}~~~", s);46414642substr r = s.triml(" \t");4643_grow_filter_arena(r.len);4644size_t pos = 0; // the filtered size4645bool filtered_chars = false;4646for(size_t i = 0; i < r.len; ++i)4647{4648const char curr = r.str[i];4649_c4dbgfps("[{}]: '{}'", i, _c4prc(curr));4650if(curr == ' ' || curr == '\t')4651{4652_filter_ws</*keep_trailing_ws*/false>(r, &i, &pos);4653}4654else if(curr == '\n')4655{4656filtered_chars = _filter_nl</*backslash_is_escape*/false, /*keep_trailing_ws*/false>(r, &i, &pos, indentation);4657}4658else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/18859004659{4660;4661}4662else4663{4664m_filter_arena.str[pos++] = r[i];4665}4666}46674668_RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len);4669if(pos < r.len || filtered_chars)4670{4671r = _finish_filter_arena(r, pos);4672}46734674_RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len);4675_c4dbgfps("#filteredchars={} after=~~~{}~~~", s.len - r.len, r);46764677#undef _c4dbgfps4678return r;4679}468046814682//-----------------------------------------------------------------------------4683csubstr Parser::_filter_squot_scalar(substr s)4684{4685// a debugging scaffold:4686#if 04687#define _c4dbgfsq(...) _c4dbgpf("filt_squo_scalar")4688#else4689#define _c4dbgfsq(...)4690#endif46914692// from the YAML spec for double-quoted scalars:4693// https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted46944695_c4dbgfsq(": before=~~~{}~~~", s);46964697_grow_filter_arena(s.len);4698substr r = s;4699size_t pos = 0; // the filtered size4700bool filtered_chars = false;4701for(size_t i = 0; i < r.len; ++i)4702{4703const char curr = r[i];4704_c4dbgfsq("[{}]: '{}'", i, _c4prc(curr));4705if(curr == ' ' || curr == '\t')4706{4707_filter_ws</*keep_trailing_ws*/true>(r, &i, &pos);4708}4709else if(curr == '\n')4710{4711filtered_chars = _filter_nl</*backslash_is_escape*/false, /*keep_trailing_ws*/true>(r, &i, &pos, /*indentation*/0);4712}4713else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/18859004714{4715;4716}4717else if(curr == '\'')4718{4719char next = i+1 < r.len ? r[i+1] : '\0';4720if(next == '\'')4721{4722_c4dbgfsq("[{}]: two consecutive quotes", i);4723filtered_chars = true;4724m_filter_arena.str[pos++] = '\'';4725++i;4726}4727}4728else4729{4730m_filter_arena.str[pos++] = curr;4731}4732}47334734_RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len);4735if(pos < r.len || filtered_chars)4736{4737r = _finish_filter_arena(r, pos);4738}47394740_RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len);4741_c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r);47424743#undef _c4dbgfsq4744return r;4745}474647474748//-----------------------------------------------------------------------------4749csubstr Parser::_filter_dquot_scalar(substr s)4750{4751// a debugging scaffold:4752#if 04753#define _c4dbgfdq(...) _c4dbgpf("filt_dquo_scalar" __VA_ARGS__)4754#else4755#define _c4dbgfdq(...)4756#endif47574758_c4dbgfdq(": before=~~~{}~~~", s);47594760// from the YAML spec for double-quoted scalars:4761// https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted4762//4763// All leading and trailing white space characters are excluded4764// from the content. Each continuation line must therefore contain4765// at least one non-space character. Empty lines, if any, are4766// consumed as part of the line folding.47674768_grow_filter_arena(s.len + 2u * s.count('\\'));4769substr r = s;4770size_t pos = 0; // the filtered size4771bool filtered_chars = false;4772for(size_t i = 0; i < r.len; ++i)4773{4774const char curr = r[i];4775_c4dbgfdq("[{}]: '{}'", i, _c4prc(curr));4776if(curr == ' ' || curr == '\t')4777{4778_filter_ws</*keep_trailing_ws*/true>(r, &i, &pos);4779}4780else if(curr == '\n')4781{4782filtered_chars = _filter_nl</*backslash_is_escape*/true, /*keep_trailing_ws*/true>(r, &i, &pos, /*indentation*/0);4783}4784else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/18859004785{4786;4787}4788else if(curr == '\\')4789{4790char next = i+1 < r.len ? r[i+1] : '\0';4791_c4dbgfdq("[{}]: backslash, next='{}'", i, _c4prc(next));4792filtered_chars = true;4793if(next == '\r')4794{4795if(i+2 < r.len && r[i+2] == '\n')4796{4797++i; // newline escaped with \ -- skip both (add only one as i is loop-incremented)4798next = '\n';4799_c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", i);4800}4801}4802// remember the loop will also increment i4803if(next == '\n')4804{4805size_t ii = i + 2;4806for( ; ii < r.len; ++ii)4807{4808if(r.str[ii] == ' ' || r.str[ii] == '\t') // skip leading whitespace4809;4810else4811break;4812}4813i += ii - i - 1;4814}4815else if(next == '"' || next == '/' || next == ' ' || next == '\t') // escapes for json compatibility4816{4817m_filter_arena.str[pos++] = next;4818++i;4819}4820else if(next == '\r')4821{4822//++i;4823}4824else if(next == 'n')4825{4826m_filter_arena.str[pos++] = '\n';4827++i;4828}4829else if(next == 'r')4830{4831m_filter_arena.str[pos++] = '\r';4832++i; // skip4833}4834else if(next == 't')4835{4836m_filter_arena.str[pos++] = '\t';4837++i;4838}4839else if(next == '\\')4840{4841m_filter_arena.str[pos++] = '\\';4842++i;4843}4844else if(next == 'x') // UTF84845{4846if(i + 1u + 2u >= r.len)4847_c4err("\\x requires 2 hex digits");4848uint8_t byteval = {};4849if(!read_hex(r.sub(i + 2u, 2u), &byteval))4850_c4err("failed to read \\x codepoint");4851m_filter_arena.str[pos++] = *(char*)&byteval;4852i += 1u + 2u;4853}4854else if(next == 'u') // UTF164855{4856if(i + 1u + 4u >= r.len)4857_c4err("\\u requires 4 hex digits");4858char readbuf[8];4859csubstr codepoint = r.sub(i + 2u, 4u);4860uint32_t codepoint_val = {};4861if(!read_hex(codepoint, &codepoint_val))4862_c4err("failed to parse \\u codepoint");4863size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);4864C4_ASSERT(numbytes <= 4);4865memcpy(m_filter_arena.str + pos, readbuf, numbytes);4866pos += numbytes;4867i += 1u + 4u;4868}4869else if(next == 'U') // UTF324870{4871if(i + 1u + 8u >= r.len)4872_c4err("\\U requires 8 hex digits");4873char readbuf[8];4874csubstr codepoint = r.sub(i + 2u, 8u);4875uint32_t codepoint_val = {};4876if(!read_hex(codepoint, &codepoint_val))4877_c4err("failed to parse \\U codepoint");4878size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);4879C4_ASSERT(numbytes <= 4);4880memcpy(m_filter_arena.str + pos, readbuf, numbytes);4881pos += numbytes;4882i += 1u + 8u;4883}4884// https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char4885else if(next == '0')4886{4887m_filter_arena.str[pos++] = '\0';4888++i;4889}4890else if(next == 'b') // backspace4891{4892m_filter_arena.str[pos++] = '\b';4893++i;4894}4895else if(next == 'f') // form feed4896{4897m_filter_arena.str[pos++] = '\f';4898++i;4899}4900else if(next == 'a') // bell character4901{4902m_filter_arena.str[pos++] = '\a';4903++i;4904}4905else if(next == 'v') // vertical tab4906{4907m_filter_arena.str[pos++] = '\v';4908++i;4909}4910else if(next == 'e') // escape character4911{4912m_filter_arena.str[pos++] = '\x1b';4913++i;4914}4915else if(next == '_') // unicode non breaking space \u00a04916{4917// https://www.compart.com/en/unicode/U+00a04918m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2);4919m_filter_arena.str[pos++] = _RYML_CHCONST(-0x60, 0xa0);4920++i;4921}4922else if(next == 'N') // unicode next line \u00854923{4924// https://www.compart.com/en/unicode/U+00854925m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2);4926m_filter_arena.str[pos++] = _RYML_CHCONST(-0x7b, 0x85);4927++i;4928}4929else if(next == 'L') // unicode line separator \u20284930{4931// https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex4932m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2);4933m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80);4934m_filter_arena.str[pos++] = _RYML_CHCONST(-0x58, 0xa8);4935++i;4936}4937else if(next == 'P') // unicode paragraph separator \u20294938{4939// https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex4940m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2);4941m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80);4942m_filter_arena.str[pos++] = _RYML_CHCONST(-0x57, 0xa9);4943++i;4944}4945_c4dbgfdq("[{}]: backslash...sofar=[{}]~~~{}~~~", i, pos, m_filter_arena.first(pos));4946}4947else4948{4949m_filter_arena.str[pos++] = curr;4950}4951}49524953_RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len);4954if(pos < r.len || filtered_chars)4955{4956r = _finish_filter_arena(r, pos);4957}49584959_RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len);4960_c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r);49614962#undef _c4dbgfdq49634964return r;4965}496649674968//-----------------------------------------------------------------------------4969bool Parser::_apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp)4970{4971substr trimmed = buf.first(*pos).trimr('\n');4972bool added_newline = false;4973switch(chomp)4974{4975case CHOMP_KEEP:4976if(trimmed.len == *pos)4977{4978_c4dbgpf("chomp=KEEP: add missing newline @{}", *pos);4979//m_filter_arena.str[(*pos)++] = '\n';4980added_newline = true;4981}4982break;4983case CHOMP_CLIP:4984if(trimmed.len == *pos)4985{4986_c4dbgpf("chomp=CLIP: add missing newline @{}", *pos);4987m_filter_arena.str[(*pos)++] = '\n';4988added_newline = true;4989}4990else4991{4992_c4dbgpf("chomp=CLIP: include single trailing newline @{}", trimmed.len+1);4993*pos = trimmed.len + 1;4994}4995break;4996case CHOMP_STRIP:4997_c4dbgpf("chomp=STRIP: strip {}-{}-{} newlines", *pos, trimmed.len, *pos-trimmed.len);4998*pos = trimmed.len;4999break;5000default:5001_c4err("unknown chomp style");5002}5003return added_newline;5004}500550065007//-----------------------------------------------------------------------------5008csubstr Parser::_filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation)5009{5010// a debugging scaffold:5011#if 05012#define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block" fmt, __VA_ARGS__)5013#else5014#define _c4dbgfbl(...)5015#endif50165017_c4dbgfbl(": indentation={} before=[{}]~~~{}~~~", indentation, s.len, s);50185019if(chomp != CHOMP_KEEP && s.trim(" \n\r").len == 0u)5020{5021_c4dbgp("filt_block: empty scalar");5022return s.first(0);5023}50245025substr r = s;50265027switch(style)5028{5029case BLOCK_LITERAL:5030{5031_c4dbgp("filt_block: style=literal");5032// trim leading whitespace up to indentation5033{5034size_t numws = r.first_not_of(' ');5035if(numws != npos)5036{5037if(numws > indentation)5038r = r.sub(indentation);5039else5040r = r.sub(numws);5041_c4dbgfbl(": after triml=[{}]~~~{}~~~", r.len, r);5042}5043else5044{5045if(chomp != CHOMP_KEEP || r.len == 0)5046{5047_c4dbgfbl(": all spaces {}, return empty", r.len);5048return r.first(0);5049}5050else5051{5052r[0] = '\n';5053return r.first(1);5054}5055}5056}5057_grow_filter_arena(s.len + 2u); // use s.len! because we may need to add a newline at the end, so the leading indentation will allow space for that newline5058size_t pos = 0; // the filtered size5059for(size_t i = 0; i < r.len; ++i)5060{5061const char curr = r.str[i];5062_c4dbgfbl("[{}]='{}' pos={}", i, _c4prc(curr), pos);5063if(curr == '\r')5064continue;5065m_filter_arena.str[pos++] = curr;5066if(curr == '\n')5067{5068_c4dbgfbl("[{}]: found newline", i);5069// skip indentation on the next line5070csubstr rem = r.sub(i+1);5071size_t first = rem.first_not_of(' ');5072if(first != npos)5073{5074_RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len);5075_RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len);5076_c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, rem.str[first]);5077if(first < indentation)5078{5079_c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation);5080i += first;5081}5082else5083{5084_c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation);5085i += indentation;5086}5087}5088else5089{5090_RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len);5091first = rem.len;5092_c4dbgfbl("[{}]: {} spaces to the end", i, first);5093if(first)5094{5095if(first < indentation)5096{5097_c4dbgfbl("[{}]: skip everything", i);5098--pos;5099break;5100}5101else5102{5103_c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation);5104i += indentation;5105}5106}5107else if(i+1 == r.len)5108{5109if(chomp == CHOMP_STRIP)5110--pos;5111break;5112}5113}5114}5115}5116_RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= pos);5117_c4dbgfbl(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r);5118bool changed = _apply_chomp(m_filter_arena, &pos, chomp);5119_RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len);5120_RYML_CB_ASSERT(m_stack.m_callbacks, pos <= s.len);5121if(pos < r.len || changed)5122{5123r = _finish_filter_arena(s, pos); // write into s5124}5125break;5126}5127case BLOCK_FOLD:5128{5129_c4dbgp("filt_block: style=fold");5130_grow_filter_arena(r.len + 2);5131size_t pos = 0; // the filtered size5132bool filtered_chars = false;5133bool started = false;5134bool is_indented = false;5135size_t i = r.first_not_of(' ');5136_c4dbgfbl(": first non space at {}", i);5137if(i > indentation)5138{5139is_indented = true;5140i = indentation;5141}5142_c4dbgfbl(": start folding at {}, is_indented={}", i, (int)is_indented);5143auto on_change_indentation = [&](size_t numnl_following, size_t last_newl, size_t first_non_whitespace){5144_c4dbgfbl("[{}]: add 1+{} newlines", i, numnl_following);5145for(size_t j = 0; j < 1 + numnl_following; ++j)5146m_filter_arena.str[pos++] = '\n';5147for(i = last_newl + 1 + indentation; i < first_non_whitespace; ++i)5148{5149if(r.str[i] == '\r')5150continue;5151_c4dbgfbl("[{}]: add '{}'", i, _c4prc(r.str[i]));5152m_filter_arena.str[pos++] = r.str[i];5153}5154--i;5155};5156for( ; i < r.len; ++i)5157{5158const char curr = r.str[i];5159_c4dbgfbl("[{}]='{}'", i, _c4prc(curr));5160if(curr == '\n')5161{5162filtered_chars = true;5163// skip indentation on the next line, and advance over the next non-indented blank lines as well5164size_t first_non_whitespace;5165size_t numnl_following = (size_t)-1;5166while(r[i] == '\n')5167{5168++numnl_following;5169csubstr rem = r.sub(i+1);5170size_t first = rem.first_not_of(' ');5171_c4dbgfbl("[{}]: found newline. first={} rem.len={}", i, first, rem.len);5172if(first != npos)5173{5174first_non_whitespace = first + i+1;5175while(first_non_whitespace < r.len && r[first_non_whitespace] == '\r')5176++first_non_whitespace;5177_RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len);5178_RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len);5179_c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, _c4prc(rem.str[first]));5180if(first < indentation)5181{5182_c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation);5183i += first;5184}5185else5186{5187_c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation);5188i += indentation;5189if(first > indentation)5190{5191_c4dbgfbl("[{}]: {} further indented than {}, stop newlining", i, first, indentation);5192goto finished_counting_newlines;5193}5194}5195// prepare the next while loop iteration5196// by setting i at the next newline after5197// an empty line5198if(r[first_non_whitespace] == '\n')5199i = first_non_whitespace;5200else5201goto finished_counting_newlines;5202}5203else5204{5205_RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len);5206first = rem.len;5207first_non_whitespace = first + i+1;5208if(first)5209{5210_c4dbgfbl("[{}]: {} spaces to the end", i, first);5211if(first < indentation)5212{5213_c4dbgfbl("[{}]: skip everything", i);5214i += first;5215}5216else5217{5218_c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation);5219i += indentation;5220if(first > indentation)5221{5222_c4dbgfbl("[{}]: {} spaces missing. not done yet", i, indentation - first);5223goto finished_counting_newlines;5224}5225}5226}5227else // if(i+1 == r.len)5228{5229_c4dbgfbl("[{}]: it's the final newline", i);5230_RYML_CB_ASSERT(m_stack.m_callbacks, i+1 == r.len);5231_RYML_CB_ASSERT(m_stack.m_callbacks, rem.len == 0);5232}5233goto end_of_scalar;5234}5235}5236end_of_scalar:5237// Write all the trailing newlines. Since we're5238// at the end no folding is needed, so write every5239// newline (add 1).5240_c4dbgfbl("[{}]: add {} trailing newlines", i, 1+numnl_following);5241for(size_t j = 0; j < 1 + numnl_following; ++j)5242m_filter_arena.str[pos++] = '\n';5243break;5244finished_counting_newlines:5245_c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace);5246while(first_non_whitespace < r.len && r[first_non_whitespace] == '\t')5247++first_non_whitespace;5248_c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace);5249_RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace <= r.len);5250size_t last_newl = r.last_of('\n', first_non_whitespace);5251size_t this_indentation = first_non_whitespace - last_newl - 1;5252_c4dbgfbl("[{}]: #newlines={} firstnonws={} lastnewl={} this_indentation={} vs indentation={}", i, numnl_following, first_non_whitespace, last_newl, this_indentation, indentation);5253_RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace >= last_newl + 1);5254_RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation >= indentation);5255if(!started)5256{5257_c4dbgfbl("[{}]: #newlines={}. write all leading newlines", i, numnl_following);5258for(size_t j = 0; j < 1 + numnl_following; ++j)5259m_filter_arena.str[pos++] = '\n';5260if(this_indentation > indentation)5261{5262is_indented = true;5263_c4dbgfbl("[{}]: advance ->{}", i, last_newl + indentation);5264i = last_newl + indentation;5265}5266else5267{5268i = first_non_whitespace - 1;5269_c4dbgfbl("[{}]: advance ->{}", i, first_non_whitespace);5270}5271}5272else if(this_indentation == indentation)5273{5274_c4dbgfbl("[{}]: same indentation", i);5275if(!is_indented)5276{5277if(numnl_following == 0)5278{5279_c4dbgfbl("[{}]: fold!", i);5280m_filter_arena.str[pos++] = ' ';5281}5282else5283{5284_c4dbgfbl("[{}]: add {} newlines", i, 1 + numnl_following);5285for(size_t j = 0; j < numnl_following; ++j)5286m_filter_arena.str[pos++] = '\n';5287}5288i = first_non_whitespace - 1;5289_c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace);5290}5291else5292{5293_c4dbgfbl("[{}]: back to ref indentation", i);5294is_indented = false;5295on_change_indentation(numnl_following, last_newl, first_non_whitespace);5296_c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace);5297}5298}5299else5300{5301_c4dbgfbl("[{}]: increased indentation.", i);5302is_indented = true;5303_RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation > indentation);5304on_change_indentation(numnl_following, last_newl, first_non_whitespace);5305_c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace);5306}5307}5308else if(curr != '\r')5309{5310if(curr != '\t')5311started = true;5312m_filter_arena.str[pos++] = curr;5313}5314}5315_RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len);5316_c4dbgfbl(": #filteredchars={} after=[{}]~~~{}~~~", (int)s.len - (int)pos, pos, m_filter_arena.first(pos));5317bool changed = _apply_chomp(m_filter_arena, &pos, chomp);5318if(pos < r.len || filtered_chars || changed)5319{5320r = _finish_filter_arena(s, pos); // write into s5321}5322}5323break;5324default:5325_c4err("unknown block style");5326}53275328_c4dbgfbl(": final=[{}]~~~{}~~~", r.len, r);53295330#undef _c4dbgfbl53315332return r;5333}53345335//-----------------------------------------------------------------------------5336size_t Parser::_count_nlines(csubstr src)5337{5338return 1 + src.count('\n');5339}53405341//-----------------------------------------------------------------------------5342void Parser::_handle_directive(csubstr directive_)5343{5344csubstr directive = directive_;5345if(directive.begins_with("%TAG"))5346{5347TagDirective td;5348_c4dbgpf("%TAG directive: {}", directive_);5349directive = directive.sub(4);5350if(!directive.begins_with(' '))5351_c4err("malformed tag directive: {}", directive_);5352directive = directive.triml(' ');5353size_t pos = directive.find(' ');5354if(pos == npos)5355_c4err("malformed tag directive: {}", directive_);5356td.handle = directive.first(pos);5357directive = directive.sub(td.handle.len).triml(' ');5358pos = directive.find(' ');5359if(pos != npos)5360directive = directive.first(pos);5361td.prefix = directive;5362td.next_node_id = m_tree->size();5363if(m_tree->size() > 0)5364{5365size_t prev = m_tree->size() - 1;5366if(m_tree->is_root(prev) && m_tree->type(prev) != NOTYPE && !m_tree->is_stream(prev))5367++td.next_node_id;5368}5369_c4dbgpf("%TAG: handle={} prefix={} next_node={}", td.handle, td.prefix, td.next_node_id);5370m_tree->add_tag_directive(td);5371}5372else if(directive.begins_with("%YAML"))5373{5374_c4dbgpf("%YAML directive! ignoring...: {}", directive);5375}5376}53775378//-----------------------------------------------------------------------------5379void Parser::set_flags(flag_t f, State * s)5380{5381#ifdef RYML_DBG5382char buf1_[64], buf2_[64];5383csubstr buf1 = _prfl(buf1_, f);5384csubstr buf2 = _prfl(buf2_, s->flags);5385_c4dbgpf("state[{}]: setting flags to {}: before={}", s-m_stack.begin(), buf1, buf2);5386#endif5387s->flags = f;5388}53895390void Parser::add_flags(flag_t on, State * s)5391{5392#ifdef RYML_DBG5393char buf1_[64], buf2_[64], buf3_[64];5394csubstr buf1 = _prfl(buf1_, on);5395csubstr buf2 = _prfl(buf2_, s->flags);5396csubstr buf3 = _prfl(buf3_, s->flags|on);5397_c4dbgpf("state[{}]: adding flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3);5398#endif5399s->flags |= on;5400}54015402void Parser::addrem_flags(flag_t on, flag_t off, State * s)5403{5404#ifdef RYML_DBG5405char buf1_[64], buf2_[64], buf3_[64], buf4_[64];5406csubstr buf1 = _prfl(buf1_, on);5407csubstr buf2 = _prfl(buf2_, off);5408csubstr buf3 = _prfl(buf3_, s->flags);5409csubstr buf4 = _prfl(buf4_, ((s->flags|on)&(~off)));5410_c4dbgpf("state[{}]: adding flags {} / removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3, buf4);5411#endif5412s->flags |= on;5413s->flags &= ~off;5414}54155416void Parser::rem_flags(flag_t off, State * s)5417{5418#ifdef RYML_DBG5419char buf1_[64], buf2_[64], buf3_[64];5420csubstr buf1 = _prfl(buf1_, off);5421csubstr buf2 = _prfl(buf2_, s->flags);5422csubstr buf3 = _prfl(buf3_, s->flags&(~off));5423_c4dbgpf("state[{}]: removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3);5424#endif5425s->flags &= ~off;5426}54275428//-----------------------------------------------------------------------------54295430csubstr Parser::_prfl(substr buf, flag_t flags)5431{5432size_t pos = 0;5433bool gotone = false;54345435#define _prflag(fl) \5436if((flags & fl) == (fl)) \5437{ \5438if(gotone) \5439{ \5440if(pos + 1 < buf.len) \5441buf[pos] = '|'; \5442++pos; \5443} \5444csubstr fltxt = #fl; \5445if(pos + fltxt.len <= buf.len) \5446memcpy(buf.str + pos, fltxt.str, fltxt.len); \5447pos += fltxt.len; \5448gotone = true; \5449}54505451_prflag(RTOP);5452_prflag(RUNK);5453_prflag(RMAP);5454_prflag(RSEQ);5455_prflag(FLOW);5456_prflag(QMRK);5457_prflag(RKEY);5458_prflag(RVAL);5459_prflag(RNXT);5460_prflag(SSCL);5461_prflag(QSCL);5462_prflag(RSET);5463_prflag(NDOC);5464_prflag(RSEQIMAP);54655466#undef _prflag54675468RYML_ASSERT(pos <= buf.len);54695470return buf.first(pos);5471}547254735474//-----------------------------------------------------------------------------5475//-----------------------------------------------------------------------------5476//-----------------------------------------------------------------------------54775478void Parser::_grow_filter_arena(size_t num_characters_needed)5479{5480_c4dbgpf("grow: arena={} numchars={}", m_filter_arena.len, num_characters_needed);5481if(num_characters_needed <= m_filter_arena.len)5482return;5483size_t sz = m_filter_arena.len << 1;5484_c4dbgpf("grow: sz={}", sz);5485sz = num_characters_needed > sz ? num_characters_needed : sz;5486_c4dbgpf("grow: sz={}", sz);5487sz = sz < 128u ? 128u : sz;5488_c4dbgpf("grow: sz={}", sz);5489_RYML_CB_ASSERT(m_stack.m_callbacks, sz >= num_characters_needed);5490_resize_filter_arena(sz);5491}54925493void Parser::_resize_filter_arena(size_t num_characters)5494{5495if(num_characters > m_filter_arena.len)5496{5497_c4dbgpf("resize: sz={}", num_characters);5498char *prev = m_filter_arena.str;5499if(m_filter_arena.str)5500{5501_RYML_CB_ASSERT(m_stack.m_callbacks, m_filter_arena.len > 0);5502_RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len);5503}5504m_filter_arena.str = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, char, num_characters, prev);5505m_filter_arena.len = num_characters;5506}5507}55085509substr Parser::_finish_filter_arena(substr dst, size_t pos)5510{5511_RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len);5512_RYML_CB_ASSERT(m_stack.m_callbacks, pos <= dst.len);5513memcpy(dst.str, m_filter_arena.str, pos);5514return dst.first(pos);5515}551655175518//-----------------------------------------------------------------------------5519//-----------------------------------------------------------------------------5520//-----------------------------------------------------------------------------55215522csubstr Parser::location_contents(Location const& loc) const5523{5524_RYML_CB_ASSERT(m_stack.m_callbacks, loc.offset < m_buf.len);5525return m_buf.sub(loc.offset);5526}55275528Location Parser::location(ConstNodeRef node) const5529{5530_RYML_CB_ASSERT(m_stack.m_callbacks, node.valid());5531return location(*node.tree(), node.id());5532}55335534Location Parser::location(Tree const& tree, size_t node) const5535{5536// try hard to avoid getting the location from a null string.5537Location loc;5538if(_location_from_node(tree, node, &loc, 0))5539return loc;5540return val_location(m_buf.str);5541}55425543bool Parser::_location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const5544{5545if(tree.has_key(node))5546{5547csubstr k = tree.key(node);5548if(C4_LIKELY(k.str != nullptr))5549{5550_RYML_CB_ASSERT(m_stack.m_callbacks, k.is_sub(m_buf));5551_RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(k));5552*loc = val_location(k.str);5553return true;5554}5555}55565557if(tree.has_val(node))5558{5559csubstr v = tree.val(node);5560if(C4_LIKELY(v.str != nullptr))5561{5562_RYML_CB_ASSERT(m_stack.m_callbacks, v.is_sub(m_buf));5563_RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(v));5564*loc = val_location(v.str);5565return true;5566}5567}55685569if(tree.is_container(node))5570{5571if(_location_from_cont(tree, node, loc))5572return true;5573}55745575if(tree.type(node) != NOTYPE && level == 0)5576{5577// try the prev sibling5578{5579const size_t prev = tree.prev_sibling(node);5580if(prev != NONE)5581{5582if(_location_from_node(tree, prev, loc, level+1))5583return true;5584}5585}5586// try the next sibling5587{5588const size_t next = tree.next_sibling(node);5589if(next != NONE)5590{5591if(_location_from_node(tree, next, loc, level+1))5592return true;5593}5594}5595// try the parent5596{5597const size_t parent = tree.parent(node);5598if(parent != NONE)5599{5600if(_location_from_node(tree, parent, loc, level+1))5601return true;5602}5603}5604}56055606return false;5607}56085609bool Parser::_location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const5610{5611_RYML_CB_ASSERT(m_stack.m_callbacks, tree.is_container(node));5612if(!tree.is_stream(node))5613{5614const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container5615if(tree.has_children(node))5616{5617size_t child = tree.first_child(node);5618if(tree.has_key(child))5619{5620// when a map starts, the container was set after the key5621csubstr k = tree.key(child);5622if(k.str && node_start > k.str)5623node_start = k.str;5624}5625}5626*loc = val_location(node_start);5627return true;5628}5629else // it's a stream5630{5631*loc = val_location(m_buf.str); // just return the front of the buffer5632}5633return true;5634}563556365637Location Parser::val_location(const char *val) const5638{5639if(C4_UNLIKELY(val == nullptr))5640return {m_file, 0, 0, 0};56415642_RYML_CB_CHECK(m_stack.m_callbacks, m_options.locations());5643// NOTE: if any of these checks fails, the parser needs to be5644// instantiated with locations enabled.5645_RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);5646_RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);5647_RYML_CB_ASSERT(m_stack.m_callbacks, m_options.locations());5648_RYML_CB_ASSERT(m_stack.m_callbacks, !_locations_dirty());5649_RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets != nullptr);5650_RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size > 0);5651// NOTE: the pointer needs to belong to the buffer that was used to parse.5652csubstr src = m_buf;5653_RYML_CB_CHECK(m_stack.m_callbacks, val != nullptr || src.str == nullptr);5654_RYML_CB_CHECK(m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));5655// ok. search the first stored newline after the given ptr5656using lineptr_type = size_t const* C4_RESTRICT;5657lineptr_type lineptr = nullptr;5658size_t offset = (size_t)(val - src.begin());5659if(m_newline_offsets_size < 30) // TODO magic number5660{5661// just do a linear search if the size is small.5662for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)5663{5664if(*curr > offset)5665{5666lineptr = curr;5667break;5668}5669}5670}5671else5672{5673// do a bisection search if the size is not small.5674//5675// We could use std::lower_bound but this is simple enough and5676// spares the include of <algorithm>.5677size_t count = m_newline_offsets_size;5678size_t step;5679lineptr_type it;5680lineptr = m_newline_offsets;5681while(count)5682{5683step = count >> 1;5684it = lineptr + step;5685if(*it < offset)5686{5687lineptr = ++it;5688count -= step + 1;5689}5690else5691{5692count = step;5693}5694}5695}5696_RYML_CB_ASSERT(m_stack.m_callbacks, lineptr >= m_newline_offsets);5697_RYML_CB_ASSERT(m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);5698_RYML_CB_ASSERT(m_stack.m_callbacks, *lineptr > offset);5699Location loc;5700loc.name = m_file;5701loc.offset = offset;5702loc.line = (size_t)(lineptr - m_newline_offsets);5703if(lineptr > m_newline_offsets)5704loc.col = (offset - *(lineptr-1) - 1u);5705else5706loc.col = offset;5707return loc;5708}57095710void Parser::_prepare_locations()5711{5712m_newline_offsets_buf = m_buf;5713size_t numnewlines = 1u + m_buf.count('\n');5714_resize_locations(numnewlines);5715m_newline_offsets_size = 0;5716for(size_t i = 0; i < m_buf.len; i++)5717if(m_buf[i] == '\n')5718m_newline_offsets[m_newline_offsets_size++] = i;5719m_newline_offsets[m_newline_offsets_size++] = m_buf.len;5720_RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == numnewlines);5721}57225723void Parser::_resize_locations(size_t numnewlines)5724{5725if(numnewlines > m_newline_offsets_capacity)5726{5727if(m_newline_offsets)5728_RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);5729m_newline_offsets = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);5730m_newline_offsets_capacity = numnewlines;5731}5732}57335734bool Parser::_locations_dirty() const5735{5736return !m_newline_offsets_size;5737}57385739} // namespace yml5740} // namespace c4574157425743#if defined(_MSC_VER)5744# pragma warning(pop)5745#elif defined(__clang__)5746# pragma clang diagnostic pop5747#elif defined(__GNUC__)5748# pragma GCC diagnostic pop5749#endif575057515752