Path: blob/jdk8u272-b10-aarch32-20201026/jdk/src/share/native/common/unicode/stringtriebuilder.h
48773 views
// © 2016 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html2/*3*******************************************************************************4* Copyright (C) 2010-2012,2014, International Business Machines5* Corporation and others. All Rights Reserved.6*******************************************************************************7* file name: stringtriebuilder.h8* encoding: UTF-89* tab size: 8 (not used)10* indentation:411*12* created on: 2010dec2413* created by: Markus W. Scherer14*/1516#ifndef __STRINGTRIEBUILDER_H__17#define __STRINGTRIEBUILDER_H__1819#include "unicode/utypes.h"20#include "unicode/uobject.h"2122/**23* \file24* \brief C++ API: Builder API for trie builders25*/2627// Forward declaration.28/// \cond29struct UHashtable;30typedef struct UHashtable UHashtable;31/// \endcond3233/**34* Build options for BytesTrieBuilder and CharsTrieBuilder.35* @stable ICU 4.836*/37enum UStringTrieBuildOption {38/**39* Builds a trie quickly.40* @stable ICU 4.841*/42USTRINGTRIE_BUILD_FAST,43/**44* Builds a trie more slowly, attempting to generate45* a shorter but equivalent serialization.46* This build option also uses more memory.47*48* This option can be effective when many integer values are the same49* and string/byte sequence suffixes can be shared.50* Runtime speed is not expected to improve.51* @stable ICU 4.852*/53USTRINGTRIE_BUILD_SMALL54};5556U_NAMESPACE_BEGIN5758/**59* Base class for string trie builder classes.60*61* This class is not intended for public subclassing.62* @stable ICU 4.863*/64class U_COMMON_API StringTrieBuilder : public UObject {65public:66#ifndef U_HIDE_INTERNAL_API67/** @internal */68static int32_t hashNode(const void *node);69/** @internal */70static UBool equalNodes(const void *left, const void *right);71#endif /* U_HIDE_INTERNAL_API */7273protected:74// Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API75// or else the compiler will create a public default constructor.76/** @internal */77StringTrieBuilder();78/** @internal */79virtual ~StringTrieBuilder();8081#ifndef U_HIDE_INTERNAL_API82/** @internal */83void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);84/** @internal */85void deleteCompactBuilder();8687/** @internal */88void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);8990/** @internal */91int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);92/** @internal */93int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);94#endif /* U_HIDE_INTERNAL_API */9596class Node;9798#ifndef U_HIDE_INTERNAL_API99/** @internal */100Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);101/** @internal */102Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,103int32_t length, UErrorCode &errorCode);104#endif /* U_HIDE_INTERNAL_API */105106/** @internal */107virtual int32_t getElementStringLength(int32_t i) const = 0;108/** @internal */109virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0;110/** @internal */111virtual int32_t getElementValue(int32_t i) const = 0;112113// Finds the first unit index after this one where114// the first and last element have different units again.115/** @internal */116virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;117118// Number of different units at unitIndex.119/** @internal */120virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;121/** @internal */122virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;123/** @internal */124virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0;125126/** @internal */127virtual UBool matchNodesCanHaveValues() const = 0;128129/** @internal */130virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;131/** @internal */132virtual int32_t getMinLinearMatch() const = 0;133/** @internal */134virtual int32_t getMaxLinearMatchLength() const = 0;135136#ifndef U_HIDE_INTERNAL_API137// max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).138/** @internal */139static const int32_t kMaxBranchLinearSubNodeLength=5;140141// Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units.142// log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.143/** @internal */144static const int32_t kMaxSplitBranchLevels=14;145146/**147* Makes sure that there is only one unique node registered that is148* equivalent to newNode.149* @param newNode Input node. The builder takes ownership.150* @param errorCode ICU in/out UErrorCode.151Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.152* @return newNode if it is the first of its kind, or153* an equivalent node if newNode is a duplicate.154* @internal155*/156Node *registerNode(Node *newNode, UErrorCode &errorCode);157/**158* Makes sure that there is only one unique FinalValueNode registered159* with this value.160* Avoids creating a node if the value is a duplicate.161* @param value A final value.162* @param errorCode ICU in/out UErrorCode.163Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.164* @return A FinalValueNode with the given value.165* @internal166*/167Node *registerFinalValue(int32_t value, UErrorCode &errorCode);168#endif /* U_HIDE_INTERNAL_API */169170/*171* C++ note:172* registerNode() and registerFinalValue() take ownership of their input nodes,173* and only return owned nodes.174* If they see a failure UErrorCode, they will delete the input node.175* If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.176* If there is a failure, they return NULL.177*178* NULL Node pointers can be safely passed into other Nodes because179* they call the static Node::hashCode() which checks for a NULL pointer first.180*181* Therefore, as long as builder functions register a new node,182* they need to check for failures only before explicitly dereferencing183* a Node pointer, or before setting a new UErrorCode.184*/185186// Hash set of nodes, maps from nodes to integer 1.187/** @internal */188UHashtable *nodes;189190// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,191// it is needed for layout of other objects.192/**193* @internal194* \cond195*/196class Node : public UObject {197public:198Node(int32_t initialHash) : hash(initialHash), offset(0) {}199inline int32_t hashCode() const { return hash; }200// Handles node==NULL.201static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }202// Base class operator==() compares the actual class types.203virtual UBool operator==(const Node &other) const;204inline UBool operator!=(const Node &other) const { return !operator==(other); }205/**206* Traverses the Node graph and numbers branch edges, with rightmost edges first.207* This is to avoid writing a duplicate node twice.208*209* Branch nodes in this trie data structure are not symmetric.210* Most branch edges "jump" to other nodes but the rightmost branch edges211* just continue without a jump.212* Therefore, write() must write the rightmost branch edge last213* (trie units are written backwards), and must write it at that point even if214* it is a duplicate of a node previously written elsewhere.215*216* This function visits and marks right branch edges first.217* Edges are numbered with increasingly negative values because we share the218* offset field which gets positive values when nodes are written.219* A branch edge also remembers the first number for any of its edges.220*221* When a further-left branch edge has a number in the range of the rightmost222* edge's numbers, then it will be written as part of the required right edge223* and we can avoid writing it first.224*225* After root.markRightEdgesFirst(-1) the offsets of all nodes are negative226* edge numbers.227*228* @param edgeNumber The first edge number for this node and its sub-nodes.229* @return An edge number that is at least the maximum-negative230* of the input edge number and the numbers of this node and all of its sub-nodes.231*/232virtual int32_t markRightEdgesFirst(int32_t edgeNumber);233// write() must set the offset to a positive value.234virtual void write(StringTrieBuilder &builder) = 0;235// See markRightEdgesFirst.236inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,237StringTrieBuilder &builder) {238// Note: Edge numbers are negative, lastRight<=firstRight.239// If offset>0 then this node and its sub-nodes have been written already240// and we need not write them again.241// If this node is part of the unwritten right branch edge,242// then we wait until that is written.243if(offset<0 && (offset<lastRight || firstRight<offset)) {244write(builder);245}246}247inline int32_t getOffset() const { return offset; }248protected:249int32_t hash;250int32_t offset;251};252253#ifndef U_HIDE_INTERNAL_API254// This class should not be overridden because255// registerFinalValue() compares a stack-allocated FinalValueNode256// (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)257// with the input node, and the258// !Node::operator==(other) used inside FinalValueNode::operator==(other)259// will be false if the typeid's are different.260/** @internal */261class FinalValueNode : public Node {262public:263FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}264virtual UBool operator==(const Node &other) const;265virtual void write(StringTrieBuilder &builder);266protected:267int32_t value;268};269#endif /* U_HIDE_INTERNAL_API */270271// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,272// it is needed for layout of other objects.273/**274* @internal275*/276class ValueNode : public Node {277public:278ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}279virtual UBool operator==(const Node &other) const;280void setValue(int32_t v) {281hasValue=TRUE;282value=v;283hash=hash*37u+v;284}285protected:286UBool hasValue;287int32_t value;288};289290#ifndef U_HIDE_INTERNAL_API291/**292* @internal293*/294class IntermediateValueNode : public ValueNode {295public:296IntermediateValueNode(int32_t v, Node *nextNode)297: ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }298virtual UBool operator==(const Node &other) const;299virtual int32_t markRightEdgesFirst(int32_t edgeNumber);300virtual void write(StringTrieBuilder &builder);301protected:302Node *next;303};304#endif /* U_HIDE_INTERNAL_API */305306// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,307// it is needed for layout of other objects.308/**309* @internal310*/311class LinearMatchNode : public ValueNode {312public:313LinearMatchNode(int32_t len, Node *nextNode)314: ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),315length(len), next(nextNode) {}316virtual UBool operator==(const Node &other) const;317virtual int32_t markRightEdgesFirst(int32_t edgeNumber);318protected:319int32_t length;320Node *next;321};322323#ifndef U_HIDE_INTERNAL_API324/**325* @internal326*/327class BranchNode : public Node {328public:329BranchNode(int32_t initialHash) : Node(initialHash) {}330protected:331int32_t firstEdgeNumber;332};333334/**335* @internal336*/337class ListBranchNode : public BranchNode {338public:339ListBranchNode() : BranchNode(0x444444), length(0) {}340virtual UBool operator==(const Node &other) const;341virtual int32_t markRightEdgesFirst(int32_t edgeNumber);342virtual void write(StringTrieBuilder &builder);343// Adds a unit with a final value.344void add(int32_t c, int32_t value) {345units[length]=(char16_t)c;346equal[length]=NULL;347values[length]=value;348++length;349hash=(hash*37u+c)*37u+value;350}351// Adds a unit which leads to another match node.352void add(int32_t c, Node *node) {353units[length]=(char16_t)c;354equal[length]=node;355values[length]=0;356++length;357hash=(hash*37u+c)*37u+hashCode(node);358}359protected:360Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".361int32_t length;362int32_t values[kMaxBranchLinearSubNodeLength];363char16_t units[kMaxBranchLinearSubNodeLength];364};365366/**367* @internal368*/369class SplitBranchNode : public BranchNode {370public:371SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)372: BranchNode(((0x555555u*37u+middleUnit)*37u+373hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),374unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}375virtual UBool operator==(const Node &other) const;376virtual int32_t markRightEdgesFirst(int32_t edgeNumber);377virtual void write(StringTrieBuilder &builder);378protected:379char16_t unit;380Node *lessThan;381Node *greaterOrEqual;382};383384// Branch head node, for writing the actual node lead unit.385/** @internal */386class BranchHeadNode : public ValueNode {387public:388BranchHeadNode(int32_t len, Node *subNode)389: ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),390length(len), next(subNode) {}391virtual UBool operator==(const Node &other) const;392virtual int32_t markRightEdgesFirst(int32_t edgeNumber);393virtual void write(StringTrieBuilder &builder);394protected:395int32_t length;396Node *next; // A branch sub-node.397};398399#endif /* U_HIDE_INTERNAL_API */400/// \endcond401402/** @internal */403virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,404Node *nextNode) const = 0;405406/** @internal */407virtual int32_t write(int32_t unit) = 0;408/** @internal */409virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;410/** @internal */411virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;412/** @internal */413virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;414/** @internal */415virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;416};417418U_NAMESPACE_END419420#endif // __STRINGTRIEBUILDER_H__421422423