Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/stringtriebuilder.h
38827 views
/*1*******************************************************************************2* Copyright (C) 2010-2012,2014, International Business Machines3* Corporation and others. All Rights Reserved.4*******************************************************************************5* file name: stringtriebuilder.h6* encoding: US-ASCII7* tab size: 8 (not used)8* indentation:49*10* created on: 2010dec2411* created by: Markus W. Scherer12*/1314#ifndef __STRINGTRIEBUILDER_H__15#define __STRINGTRIEBUILDER_H__1617#include "unicode/utypes.h"18#include "unicode/uobject.h"1920/**21* \file22* \brief C++ API: Builder API for trie builders23*/2425// Forward declaration.26struct UHashtable;27typedef struct UHashtable UHashtable;2829/**30* Build options for BytesTrieBuilder and CharsTrieBuilder.31* @stable ICU 4.832*/33enum UStringTrieBuildOption {34/**35* Builds a trie quickly.36* @stable ICU 4.837*/38USTRINGTRIE_BUILD_FAST,39/**40* Builds a trie more slowly, attempting to generate41* a shorter but equivalent serialization.42* This build option also uses more memory.43*44* This option can be effective when many integer values are the same45* and string/byte sequence suffixes can be shared.46* Runtime speed is not expected to improve.47* @stable ICU 4.848*/49USTRINGTRIE_BUILD_SMALL50};5152U_NAMESPACE_BEGIN5354/**55* Base class for string trie builder classes.56*57* This class is not intended for public subclassing.58* @stable ICU 4.859*/60class U_COMMON_API StringTrieBuilder : public UObject {61public:62#ifndef U_HIDE_INTERNAL_API63/** @internal */64static UBool hashNode(const void *node);65/** @internal */66static UBool equalNodes(const void *left, const void *right);67#endif /* U_HIDE_INTERNAL_API */6869protected:70// Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API71// or else the compiler will create a public default constructor.72/** @internal */73StringTrieBuilder();74/** @internal */75virtual ~StringTrieBuilder();7677#ifndef U_HIDE_INTERNAL_API78/** @internal */79void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);80/** @internal */81void deleteCompactBuilder();8283/** @internal */84void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);8586/** @internal */87int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);88/** @internal */89int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);90#endif /* U_HIDE_INTERNAL_API */9192class Node;9394#ifndef U_HIDE_INTERNAL_API95/** @internal */96Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);97/** @internal */98Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,99int32_t length, UErrorCode &errorCode);100#endif /* U_HIDE_INTERNAL_API */101102/** @internal */103virtual int32_t getElementStringLength(int32_t i) const = 0;104/** @internal */105virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0;106/** @internal */107virtual int32_t getElementValue(int32_t i) const = 0;108109// Finds the first unit index after this one where110// the first and last element have different units again.111/** @internal */112virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;113114// Number of different units at unitIndex.115/** @internal */116virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;117/** @internal */118virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;119/** @internal */120virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0;121122/** @internal */123virtual UBool matchNodesCanHaveValues() const = 0;124125/** @internal */126virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;127/** @internal */128virtual int32_t getMinLinearMatch() const = 0;129/** @internal */130virtual int32_t getMaxLinearMatchLength() const = 0;131132#ifndef U_HIDE_INTERNAL_API133// max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).134/** @internal */135static const int32_t kMaxBranchLinearSubNodeLength=5;136137// Maximum number of nested split-branch levels for a branch on all 2^16 possible UChar units.138// log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.139/** @internal */140static const int32_t kMaxSplitBranchLevels=14;141142/**143* Makes sure that there is only one unique node registered that is144* equivalent to newNode.145* @param newNode Input node. The builder takes ownership.146* @param errorCode ICU in/out UErrorCode.147Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.148* @return newNode if it is the first of its kind, or149* an equivalent node if newNode is a duplicate.150* @internal151*/152Node *registerNode(Node *newNode, UErrorCode &errorCode);153/**154* Makes sure that there is only one unique FinalValueNode registered155* with this value.156* Avoids creating a node if the value is a duplicate.157* @param value A final value.158* @param errorCode ICU in/out UErrorCode.159Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.160* @return A FinalValueNode with the given value.161* @internal162*/163Node *registerFinalValue(int32_t value, UErrorCode &errorCode);164#endif /* U_HIDE_INTERNAL_API */165166/*167* C++ note:168* registerNode() and registerFinalValue() take ownership of their input nodes,169* and only return owned nodes.170* If they see a failure UErrorCode, they will delete the input node.171* If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.172* If there is a failure, they return NULL.173*174* NULL Node pointers can be safely passed into other Nodes because175* they call the static Node::hashCode() which checks for a NULL pointer first.176*177* Therefore, as long as builder functions register a new node,178* they need to check for failures only before explicitly dereferencing179* a Node pointer, or before setting a new UErrorCode.180*/181182// Hash set of nodes, maps from nodes to integer 1.183/** @internal */184UHashtable *nodes;185186#ifndef U_HIDE_INTERNAL_API187/** @internal */188class Node : public UObject {189public:190Node(int32_t initialHash) : hash(initialHash), offset(0) {}191inline int32_t hashCode() const { return hash; }192// Handles node==NULL.193static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }194// Base class operator==() compares the actual class types.195virtual UBool operator==(const Node &other) const;196inline UBool operator!=(const Node &other) const { return !operator==(other); }197/**198* Traverses the Node graph and numbers branch edges, with rightmost edges first.199* This is to avoid writing a duplicate node twice.200*201* Branch nodes in this trie data structure are not symmetric.202* Most branch edges "jump" to other nodes but the rightmost branch edges203* just continue without a jump.204* Therefore, write() must write the rightmost branch edge last205* (trie units are written backwards), and must write it at that point even if206* it is a duplicate of a node previously written elsewhere.207*208* This function visits and marks right branch edges first.209* Edges are numbered with increasingly negative values because we share the210* offset field which gets positive values when nodes are written.211* A branch edge also remembers the first number for any of its edges.212*213* When a further-left branch edge has a number in the range of the rightmost214* edge's numbers, then it will be written as part of the required right edge215* and we can avoid writing it first.216*217* After root.markRightEdgesFirst(-1) the offsets of all nodes are negative218* edge numbers.219*220* @param edgeNumber The first edge number for this node and its sub-nodes.221* @return An edge number that is at least the maximum-negative222* of the input edge number and the numbers of this node and all of its sub-nodes.223*/224virtual int32_t markRightEdgesFirst(int32_t edgeNumber);225// write() must set the offset to a positive value.226virtual void write(StringTrieBuilder &builder) = 0;227// See markRightEdgesFirst.228inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,229StringTrieBuilder &builder) {230// Note: Edge numbers are negative, lastRight<=firstRight.231// If offset>0 then this node and its sub-nodes have been written already232// and we need not write them again.233// If this node is part of the unwritten right branch edge,234// then we wait until that is written.235if(offset<0 && (offset<lastRight || firstRight<offset)) {236write(builder);237}238}239inline int32_t getOffset() const { return offset; }240protected:241int32_t hash;242int32_t offset;243};244245// This class should not be overridden because246// registerFinalValue() compares a stack-allocated FinalValueNode247// (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)248// with the input node, and the249// !Node::operator==(other) used inside FinalValueNode::operator==(other)250// will be false if the typeid's are different.251/** @internal */252class FinalValueNode : public Node {253public:254FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}255virtual UBool operator==(const Node &other) const;256virtual void write(StringTrieBuilder &builder);257protected:258int32_t value;259};260261/**262* @internal263*/264class ValueNode : public Node {265public:266ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}267virtual UBool operator==(const Node &other) const;268void setValue(int32_t v) {269hasValue=TRUE;270value=v;271hash=hash*37+v;272}273protected:274UBool hasValue;275int32_t value;276};277278/**279* @internal280*/281class IntermediateValueNode : public ValueNode {282public:283IntermediateValueNode(int32_t v, Node *nextNode)284: ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }285virtual UBool operator==(const Node &other) const;286virtual int32_t markRightEdgesFirst(int32_t edgeNumber);287virtual void write(StringTrieBuilder &builder);288protected:289Node *next;290};291292/**293* @internal294*/295class LinearMatchNode : public ValueNode {296public:297LinearMatchNode(int32_t len, Node *nextNode)298: ValueNode((0x333333*37+len)*37+hashCode(nextNode)),299length(len), next(nextNode) {}300virtual UBool operator==(const Node &other) const;301virtual int32_t markRightEdgesFirst(int32_t edgeNumber);302protected:303int32_t length;304Node *next;305};306307/**308* @internal309*/310class BranchNode : public Node {311public:312BranchNode(int32_t initialHash) : Node(initialHash) {}313protected:314int32_t firstEdgeNumber;315};316317/**318* @internal319*/320class ListBranchNode : public BranchNode {321public:322ListBranchNode() : BranchNode(0x444444), length(0) {}323virtual UBool operator==(const Node &other) const;324virtual int32_t markRightEdgesFirst(int32_t edgeNumber);325virtual void write(StringTrieBuilder &builder);326// Adds a unit with a final value.327void add(int32_t c, int32_t value) {328units[length]=(UChar)c;329equal[length]=NULL;330values[length]=value;331++length;332hash=(hash*37+c)*37+value;333}334// Adds a unit which leads to another match node.335void add(int32_t c, Node *node) {336units[length]=(UChar)c;337equal[length]=node;338values[length]=0;339++length;340hash=(hash*37+c)*37+hashCode(node);341}342protected:343Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".344int32_t length;345int32_t values[kMaxBranchLinearSubNodeLength];346UChar units[kMaxBranchLinearSubNodeLength];347};348349/**350* @internal351*/352class SplitBranchNode : public BranchNode {353public:354SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)355: BranchNode(((0x555555*37+middleUnit)*37+356hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),357unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}358virtual UBool operator==(const Node &other) const;359virtual int32_t markRightEdgesFirst(int32_t edgeNumber);360virtual void write(StringTrieBuilder &builder);361protected:362UChar unit;363Node *lessThan;364Node *greaterOrEqual;365};366367// Branch head node, for writing the actual node lead unit.368/** @internal */369class BranchHeadNode : public ValueNode {370public:371BranchHeadNode(int32_t len, Node *subNode)372: ValueNode((0x666666*37+len)*37+hashCode(subNode)),373length(len), next(subNode) {}374virtual UBool operator==(const Node &other) const;375virtual int32_t markRightEdgesFirst(int32_t edgeNumber);376virtual void write(StringTrieBuilder &builder);377protected:378int32_t length;379Node *next; // A branch sub-node.380};381#endif /* U_HIDE_INTERNAL_API */382383/** @internal */384virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,385Node *nextNode) const = 0;386387/** @internal */388virtual int32_t write(int32_t unit) = 0;389/** @internal */390virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;391/** @internal */392virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;393/** @internal */394virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;395/** @internal */396virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;397};398399U_NAMESPACE_END400401#endif // __STRINGTRIEBUILDER_H__402403404