Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/stringtriebuilder.h
38827 views
1
/*
2
*******************************************************************************
3
* Copyright (C) 2010-2012,2014, International Business Machines
4
* Corporation and others. All Rights Reserved.
5
*******************************************************************************
6
* file name: stringtriebuilder.h
7
* encoding: US-ASCII
8
* tab size: 8 (not used)
9
* indentation:4
10
*
11
* created on: 2010dec24
12
* created by: Markus W. Scherer
13
*/
14
15
#ifndef __STRINGTRIEBUILDER_H__
16
#define __STRINGTRIEBUILDER_H__
17
18
#include "unicode/utypes.h"
19
#include "unicode/uobject.h"
20
21
/**
22
* \file
23
* \brief C++ API: Builder API for trie builders
24
*/
25
26
// Forward declaration.
27
struct UHashtable;
28
typedef struct UHashtable UHashtable;
29
30
/**
31
* Build options for BytesTrieBuilder and CharsTrieBuilder.
32
* @stable ICU 4.8
33
*/
34
enum UStringTrieBuildOption {
35
/**
36
* Builds a trie quickly.
37
* @stable ICU 4.8
38
*/
39
USTRINGTRIE_BUILD_FAST,
40
/**
41
* Builds a trie more slowly, attempting to generate
42
* a shorter but equivalent serialization.
43
* This build option also uses more memory.
44
*
45
* This option can be effective when many integer values are the same
46
* and string/byte sequence suffixes can be shared.
47
* Runtime speed is not expected to improve.
48
* @stable ICU 4.8
49
*/
50
USTRINGTRIE_BUILD_SMALL
51
};
52
53
U_NAMESPACE_BEGIN
54
55
/**
56
* Base class for string trie builder classes.
57
*
58
* This class is not intended for public subclassing.
59
* @stable ICU 4.8
60
*/
61
class U_COMMON_API StringTrieBuilder : public UObject {
62
public:
63
#ifndef U_HIDE_INTERNAL_API
64
/** @internal */
65
static UBool hashNode(const void *node);
66
/** @internal */
67
static UBool equalNodes(const void *left, const void *right);
68
#endif /* U_HIDE_INTERNAL_API */
69
70
protected:
71
// Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API
72
// or else the compiler will create a public default constructor.
73
/** @internal */
74
StringTrieBuilder();
75
/** @internal */
76
virtual ~StringTrieBuilder();
77
78
#ifndef U_HIDE_INTERNAL_API
79
/** @internal */
80
void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
81
/** @internal */
82
void deleteCompactBuilder();
83
84
/** @internal */
85
void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
86
87
/** @internal */
88
int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
89
/** @internal */
90
int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
91
#endif /* U_HIDE_INTERNAL_API */
92
93
class Node;
94
95
#ifndef U_HIDE_INTERNAL_API
96
/** @internal */
97
Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
98
/** @internal */
99
Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
100
int32_t length, UErrorCode &errorCode);
101
#endif /* U_HIDE_INTERNAL_API */
102
103
/** @internal */
104
virtual int32_t getElementStringLength(int32_t i) const = 0;
105
/** @internal */
106
virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0;
107
/** @internal */
108
virtual int32_t getElementValue(int32_t i) const = 0;
109
110
// Finds the first unit index after this one where
111
// the first and last element have different units again.
112
/** @internal */
113
virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
114
115
// Number of different units at unitIndex.
116
/** @internal */
117
virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
118
/** @internal */
119
virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
120
/** @internal */
121
virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0;
122
123
/** @internal */
124
virtual UBool matchNodesCanHaveValues() const = 0;
125
126
/** @internal */
127
virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
128
/** @internal */
129
virtual int32_t getMinLinearMatch() const = 0;
130
/** @internal */
131
virtual int32_t getMaxLinearMatchLength() const = 0;
132
133
#ifndef U_HIDE_INTERNAL_API
134
// max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
135
/** @internal */
136
static const int32_t kMaxBranchLinearSubNodeLength=5;
137
138
// Maximum number of nested split-branch levels for a branch on all 2^16 possible UChar units.
139
// log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
140
/** @internal */
141
static const int32_t kMaxSplitBranchLevels=14;
142
143
/**
144
* Makes sure that there is only one unique node registered that is
145
* equivalent to newNode.
146
* @param newNode Input node. The builder takes ownership.
147
* @param errorCode ICU in/out UErrorCode.
148
Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
149
* @return newNode if it is the first of its kind, or
150
* an equivalent node if newNode is a duplicate.
151
* @internal
152
*/
153
Node *registerNode(Node *newNode, UErrorCode &errorCode);
154
/**
155
* Makes sure that there is only one unique FinalValueNode registered
156
* with this value.
157
* Avoids creating a node if the value is a duplicate.
158
* @param value A final value.
159
* @param errorCode ICU in/out UErrorCode.
160
Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
161
* @return A FinalValueNode with the given value.
162
* @internal
163
*/
164
Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
165
#endif /* U_HIDE_INTERNAL_API */
166
167
/*
168
* C++ note:
169
* registerNode() and registerFinalValue() take ownership of their input nodes,
170
* and only return owned nodes.
171
* If they see a failure UErrorCode, they will delete the input node.
172
* If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
173
* If there is a failure, they return NULL.
174
*
175
* NULL Node pointers can be safely passed into other Nodes because
176
* they call the static Node::hashCode() which checks for a NULL pointer first.
177
*
178
* Therefore, as long as builder functions register a new node,
179
* they need to check for failures only before explicitly dereferencing
180
* a Node pointer, or before setting a new UErrorCode.
181
*/
182
183
// Hash set of nodes, maps from nodes to integer 1.
184
/** @internal */
185
UHashtable *nodes;
186
187
#ifndef U_HIDE_INTERNAL_API
188
/** @internal */
189
class Node : public UObject {
190
public:
191
Node(int32_t initialHash) : hash(initialHash), offset(0) {}
192
inline int32_t hashCode() const { return hash; }
193
// Handles node==NULL.
194
static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
195
// Base class operator==() compares the actual class types.
196
virtual UBool operator==(const Node &other) const;
197
inline UBool operator!=(const Node &other) const { return !operator==(other); }
198
/**
199
* Traverses the Node graph and numbers branch edges, with rightmost edges first.
200
* This is to avoid writing a duplicate node twice.
201
*
202
* Branch nodes in this trie data structure are not symmetric.
203
* Most branch edges "jump" to other nodes but the rightmost branch edges
204
* just continue without a jump.
205
* Therefore, write() must write the rightmost branch edge last
206
* (trie units are written backwards), and must write it at that point even if
207
* it is a duplicate of a node previously written elsewhere.
208
*
209
* This function visits and marks right branch edges first.
210
* Edges are numbered with increasingly negative values because we share the
211
* offset field which gets positive values when nodes are written.
212
* A branch edge also remembers the first number for any of its edges.
213
*
214
* When a further-left branch edge has a number in the range of the rightmost
215
* edge's numbers, then it will be written as part of the required right edge
216
* and we can avoid writing it first.
217
*
218
* After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
219
* edge numbers.
220
*
221
* @param edgeNumber The first edge number for this node and its sub-nodes.
222
* @return An edge number that is at least the maximum-negative
223
* of the input edge number and the numbers of this node and all of its sub-nodes.
224
*/
225
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
226
// write() must set the offset to a positive value.
227
virtual void write(StringTrieBuilder &builder) = 0;
228
// See markRightEdgesFirst.
229
inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
230
StringTrieBuilder &builder) {
231
// Note: Edge numbers are negative, lastRight<=firstRight.
232
// If offset>0 then this node and its sub-nodes have been written already
233
// and we need not write them again.
234
// If this node is part of the unwritten right branch edge,
235
// then we wait until that is written.
236
if(offset<0 && (offset<lastRight || firstRight<offset)) {
237
write(builder);
238
}
239
}
240
inline int32_t getOffset() const { return offset; }
241
protected:
242
int32_t hash;
243
int32_t offset;
244
};
245
246
// This class should not be overridden because
247
// registerFinalValue() compares a stack-allocated FinalValueNode
248
// (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
249
// with the input node, and the
250
// !Node::operator==(other) used inside FinalValueNode::operator==(other)
251
// will be false if the typeid's are different.
252
/** @internal */
253
class FinalValueNode : public Node {
254
public:
255
FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
256
virtual UBool operator==(const Node &other) const;
257
virtual void write(StringTrieBuilder &builder);
258
protected:
259
int32_t value;
260
};
261
262
/**
263
* @internal
264
*/
265
class ValueNode : public Node {
266
public:
267
ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
268
virtual UBool operator==(const Node &other) const;
269
void setValue(int32_t v) {
270
hasValue=TRUE;
271
value=v;
272
hash=hash*37+v;
273
}
274
protected:
275
UBool hasValue;
276
int32_t value;
277
};
278
279
/**
280
* @internal
281
*/
282
class IntermediateValueNode : public ValueNode {
283
public:
284
IntermediateValueNode(int32_t v, Node *nextNode)
285
: ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
286
virtual UBool operator==(const Node &other) const;
287
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
288
virtual void write(StringTrieBuilder &builder);
289
protected:
290
Node *next;
291
};
292
293
/**
294
* @internal
295
*/
296
class LinearMatchNode : public ValueNode {
297
public:
298
LinearMatchNode(int32_t len, Node *nextNode)
299
: ValueNode((0x333333*37+len)*37+hashCode(nextNode)),
300
length(len), next(nextNode) {}
301
virtual UBool operator==(const Node &other) const;
302
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
303
protected:
304
int32_t length;
305
Node *next;
306
};
307
308
/**
309
* @internal
310
*/
311
class BranchNode : public Node {
312
public:
313
BranchNode(int32_t initialHash) : Node(initialHash) {}
314
protected:
315
int32_t firstEdgeNumber;
316
};
317
318
/**
319
* @internal
320
*/
321
class ListBranchNode : public BranchNode {
322
public:
323
ListBranchNode() : BranchNode(0x444444), length(0) {}
324
virtual UBool operator==(const Node &other) const;
325
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
326
virtual void write(StringTrieBuilder &builder);
327
// Adds a unit with a final value.
328
void add(int32_t c, int32_t value) {
329
units[length]=(UChar)c;
330
equal[length]=NULL;
331
values[length]=value;
332
++length;
333
hash=(hash*37+c)*37+value;
334
}
335
// Adds a unit which leads to another match node.
336
void add(int32_t c, Node *node) {
337
units[length]=(UChar)c;
338
equal[length]=node;
339
values[length]=0;
340
++length;
341
hash=(hash*37+c)*37+hashCode(node);
342
}
343
protected:
344
Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".
345
int32_t length;
346
int32_t values[kMaxBranchLinearSubNodeLength];
347
UChar units[kMaxBranchLinearSubNodeLength];
348
};
349
350
/**
351
* @internal
352
*/
353
class SplitBranchNode : public BranchNode {
354
public:
355
SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
356
: BranchNode(((0x555555*37+middleUnit)*37+
357
hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),
358
unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
359
virtual UBool operator==(const Node &other) const;
360
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
361
virtual void write(StringTrieBuilder &builder);
362
protected:
363
UChar unit;
364
Node *lessThan;
365
Node *greaterOrEqual;
366
};
367
368
// Branch head node, for writing the actual node lead unit.
369
/** @internal */
370
class BranchHeadNode : public ValueNode {
371
public:
372
BranchHeadNode(int32_t len, Node *subNode)
373
: ValueNode((0x666666*37+len)*37+hashCode(subNode)),
374
length(len), next(subNode) {}
375
virtual UBool operator==(const Node &other) const;
376
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
377
virtual void write(StringTrieBuilder &builder);
378
protected:
379
int32_t length;
380
Node *next; // A branch sub-node.
381
};
382
#endif /* U_HIDE_INTERNAL_API */
383
384
/** @internal */
385
virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
386
Node *nextNode) const = 0;
387
388
/** @internal */
389
virtual int32_t write(int32_t unit) = 0;
390
/** @internal */
391
virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
392
/** @internal */
393
virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
394
/** @internal */
395
virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
396
/** @internal */
397
virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
398
};
399
400
U_NAMESPACE_END
401
402
#endif // __STRINGTRIEBUILDER_H__
403
404