Path: blob/master/thirdparty/icu4c/common/bytestrieiterator.cpp
9903 views
// © 2016 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html2/*3*******************************************************************************4* Copyright (C) 2010-2012, International Business Machines5* Corporation and others. All Rights Reserved.6*******************************************************************************7* file name: bytestrieiterator.cpp8* encoding: UTF-89* tab size: 8 (not used)10* indentation:411*12* created on: 2010nov0313* created by: Markus W. Scherer14*/1516#include "unicode/utypes.h"17#include "unicode/bytestrie.h"18#include "unicode/stringpiece.h"19#include "charstr.h"20#include "uvectr32.h"2122U_NAMESPACE_BEGIN2324BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,25UErrorCode &errorCode)26: bytes_(static_cast<const uint8_t *>(trieBytes)),27pos_(bytes_), initialPos_(bytes_),28remainingMatchLength_(-1), initialRemainingMatchLength_(-1),29str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {30if(U_FAILURE(errorCode)) {31return;32}33// str_ and stack_ are pointers so that it's easy to turn bytestrie.h into34// a public API header for which we would want it to depend only on35// other public headers.36// Unlike BytesTrie itself, its Iterator performs memory allocations anyway37// via the CharString and UVector32 implementations, so this additional38// cost is minimal.39str_=new CharString();40stack_=new UVector32(errorCode);41if(U_SUCCESS(errorCode) && (str_==nullptr || stack_==nullptr)) {42errorCode=U_MEMORY_ALLOCATION_ERROR;43}44}4546BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,47UErrorCode &errorCode)48: bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),49remainingMatchLength_(trie.remainingMatchLength_),50initialRemainingMatchLength_(trie.remainingMatchLength_),51str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {52if(U_FAILURE(errorCode)) {53return;54}55str_=new CharString();56stack_=new UVector32(errorCode);57if(U_FAILURE(errorCode)) {58return;59}60if(str_==nullptr || stack_==nullptr) {61errorCode=U_MEMORY_ALLOCATION_ERROR;62return;63}64int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.65if(length>=0) {66// Pending linear-match node, append remaining bytes to str_.67++length;68if(maxLength_>0 && length>maxLength_) {69length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.70}71str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);72pos_+=length;73remainingMatchLength_-=length;74}75}7677BytesTrie::Iterator::~Iterator() {78delete str_;79delete stack_;80}8182BytesTrie::Iterator &83BytesTrie::Iterator::reset() {84pos_=initialPos_;85remainingMatchLength_=initialRemainingMatchLength_;86int32_t length=remainingMatchLength_+1; // Remaining match length.87if(maxLength_>0 && length>maxLength_) {88length=maxLength_;89}90str_->truncate(length);91pos_+=length;92remainingMatchLength_-=length;93stack_->setSize(0);94return *this;95}9697UBool98BytesTrie::Iterator::hasNext() const { return pos_!=nullptr || !stack_->isEmpty(); }99100UBool101BytesTrie::Iterator::next(UErrorCode &errorCode) {102if(U_FAILURE(errorCode)) {103return false;104}105const uint8_t *pos=pos_;106if(pos==nullptr) {107if(stack_->isEmpty()) {108return false;109}110// Pop the state off the stack and continue with the next outbound edge of111// the branch node.112int32_t stackSize=stack_->size();113int32_t length=stack_->elementAti(stackSize-1);114pos=bytes_+stack_->elementAti(stackSize-2);115stack_->setSize(stackSize-2);116str_->truncate(length&0xffff);117length = static_cast<int32_t>(static_cast<uint32_t>(length) >> 16);118if(length>1) {119pos=branchNext(pos, length, errorCode);120if(pos==nullptr) {121return true; // Reached a final value.122}123} else {124str_->append(static_cast<char>(*pos++), errorCode);125}126}127if(remainingMatchLength_>=0) {128// We only get here if we started in a pending linear-match node129// with more than maxLength remaining bytes.130return truncateAndStop();131}132for(;;) {133int32_t node=*pos++;134if(node>=kMinValueLead) {135// Deliver value for the byte sequence so far.136UBool isFinal = static_cast<UBool>(node & kValueIsFinal);137value_=readValue(pos, node>>1);138if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {139pos_=nullptr;140} else {141pos_=skipValue(pos, node);142}143return true;144}145if(maxLength_>0 && str_->length()==maxLength_) {146return truncateAndStop();147}148if(node<kMinLinearMatch) {149if(node==0) {150node=*pos++;151}152pos=branchNext(pos, node+1, errorCode);153if(pos==nullptr) {154return true; // Reached a final value.155}156} else {157// Linear-match node, append length bytes to str_.158int32_t length=node-kMinLinearMatch+1;159if(maxLength_>0 && str_->length()+length>maxLength_) {160str_->append(reinterpret_cast<const char *>(pos),161maxLength_-str_->length(), errorCode);162return truncateAndStop();163}164str_->append(reinterpret_cast<const char *>(pos), length, errorCode);165pos+=length;166}167}168}169170StringPiece171BytesTrie::Iterator::getString() const {172return str_ == nullptr ? StringPiece() : str_->toStringPiece();173}174175UBool176BytesTrie::Iterator::truncateAndStop() {177pos_=nullptr;178value_=-1; // no real value for str179return true;180}181182// Branch node, needs to take the first outbound edge and push state for the rest.183const uint8_t *184BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {185while(length>kMaxBranchLinearSubNodeLength) {186++pos; // ignore the comparison byte187// Push state for the greater-or-equal edge.188stack_->addElement(static_cast<int32_t>(skipDelta(pos) - bytes_), errorCode);189stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);190// Follow the less-than edge.191length>>=1;192pos=jumpByDelta(pos);193}194// List of key-value pairs where values are either final values or jump deltas.195// Read the first (key, value) pair.196uint8_t trieByte=*pos++;197int32_t node=*pos++;198UBool isFinal = static_cast<UBool>(node & kValueIsFinal);199int32_t value=readValue(pos, node>>1);200pos=skipValue(pos, node);201stack_->addElement(static_cast<int32_t>(pos - bytes_), errorCode);202stack_->addElement(((length-1)<<16)|str_->length(), errorCode);203str_->append(static_cast<char>(trieByte), errorCode);204if(isFinal) {205pos_=nullptr;206value_=value;207return nullptr;208} else {209return pos+value;210}211}212213U_NAMESPACE_END214215216