Path: blob/master/thirdparty/embree/common/lexers/tokenstream.cpp
9912 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#include "tokenstream.h"4#include "../math/emath.h"56namespace embree7{8/* shorthands for common sets of characters */9const std::string TokenStream::alpha = "abcdefghijklmnopqrstuvwxyz";10const std::string TokenStream::ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";11const std::string TokenStream::numbers = "0123456789";12const std::string TokenStream::separators = "\n\t\r ";13const std::string TokenStream::stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";1415/* creates map for fast categorization of characters */16static void createCharMap(bool map[256], const std::string& chrs) {17for (size_t i=0; i<256; i++) map[i] = false;18for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;19}2021/* build full tokenizer that takes list of valid characters and keywords */22TokenStream::TokenStream(const Ref<Stream<int> >& cin, //< stream to read from23const std::string& alpha, //< valid characters for identifiers24const std::string& seps, //< characters that act as separators25const std::vector<std::string>& symbols) //< symbols26: cin(cin), symbols(symbols)27{28createCharMap(isAlphaMap,alpha);29createCharMap(isSepMap,seps);30createCharMap(isStringCharMap,stringChars);31}3233bool TokenStream::decDigits(std::string& str_o)34{35bool ok = false;36std::string str;37if (cin->peek() == '+' || cin->peek() == '-') str += (char)cin->get();38while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }39if (ok) str_o += str;40else cin->unget(str.size());41return ok;42}4344bool TokenStream::decDigits1(std::string& str_o)45{46bool ok = false;47std::string str;48while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }49if (ok) str_o += str; else cin->unget(str.size());50return ok;51}5253bool TokenStream::trySymbol(const std::string& symbol)54{55size_t pos = 0;56while (pos < symbol.size()) {57if (symbol[pos] != cin->peek()) { cin->unget(pos); return false; }58cin->drop(); pos++;59}60return true;61}6263bool TokenStream::trySymbols(Token& token, const ParseLocation& loc)64{65for (size_t i=0; i<symbols.size(); i++) {66if (!trySymbol(symbols[i])) continue;67token = Token(symbols[i],Token::TY_SYMBOL,loc);68return true;69}70return false;71}7273bool TokenStream::tryFloat(Token& token, const ParseLocation& loc)74{75bool ok = false;76std::string str;77if (trySymbol("nan")) {78token = Token(float(nan));79return true;80}81if (trySymbol("+inf")) {82token = Token(float(pos_inf));83return true;84}85if (trySymbol("-inf")) {86token = Token(float(neg_inf));87return true;88}8990if (decDigits(str))91{92if (cin->peek() == '.') {93str += (char)cin->get();94decDigits(str);95if (cin->peek() == 'e' || cin->peek() == 'E') {96str += (char)cin->get();97if (decDigits(str)) ok = true; // 1.[2]E298}99else ok = true; // 1.[2]100}101else if (cin->peek() == 'e' || cin->peek() == 'E') {102str += (char)cin->get();103if (decDigits(str)) ok = true; // 1E2104}105}106else107{108if (cin->peek() == '.') {109str += (char)cin->get();110if (decDigits(str)) {111if (cin->peek() == 'e' || cin->peek() == 'E') {112str += (char)cin->get();113if (decDigits(str)) ok = true; // .3E2114}115else ok = true; // .3116}117}118}119if (ok) {120token = Token((float)atof(str.c_str()),loc);121}122else cin->unget(str.size());123return ok;124}125126bool TokenStream::tryInt(Token& token, const ParseLocation& loc) {127std::string str;128if (decDigits(str)) {129token = Token(atoi(str.c_str()),loc);130return true;131}132return false;133}134135bool TokenStream::tryString(Token& token, const ParseLocation& loc)136{137std::string str;138if (cin->peek() != '\"') return false;139cin->drop();140while (cin->peek() != '\"') {141const int c = cin->get();142if (!isStringChar(c)) THROW_RUNTIME_ERROR("invalid string character "+std::string(1,c)+" at "+loc.str());143str += (char)c;144}145cin->drop();146token = Token(str,Token::TY_STRING,loc);147return true;148}149150bool TokenStream::tryIdentifier(Token& token, const ParseLocation& loc)151{152std::string str;153if (!isAlpha(cin->peek())) return false;154str += (char)cin->get();155while (isAlphaNum(cin->peek())) str += (char)cin->get();156token = Token(str,Token::TY_IDENTIFIER,loc);157return true;158}159160void TokenStream::skipSeparators()161{162/* skip separators */163while (cin->peek() != EOF && isSeparator(cin->peek()))164cin->drop();165}166167Token TokenStream::next()168{169Token token;170skipSeparators();171ParseLocation loc = cin->loc();172if (trySymbols (token,loc)) return token; /**< try to parse a symbol */173if (tryFloat (token,loc)) return token; /**< try to parse float */174if (tryInt (token,loc)) return token; /**< try to parse integer */175if (tryString (token,loc)) return token; /**< try to parse string */176if (tryIdentifier(token,loc)) return token; /**< try to parse identifier */177if (cin->peek() == EOF ) return Token(loc); /**< return EOF token */178return Token((char)cin->get(),loc); /**< return invalid character token */179}180}181182183