Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-aarch32-jdk8u
Path: blob/jdk8u272-b10-aarch32-20201026/jdk/src/share/native/common/unicode/stringtriebuilder.h
48773 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2010-2012,2014, International Business Machines
6
* Corporation and others. All Rights Reserved.
7
*******************************************************************************
8
* file name: stringtriebuilder.h
9
* encoding: UTF-8
10
* tab size: 8 (not used)
11
* indentation:4
12
*
13
* created on: 2010dec24
14
* created by: Markus W. Scherer
15
*/
16
17
#ifndef __STRINGTRIEBUILDER_H__
18
#define __STRINGTRIEBUILDER_H__
19
20
#include "unicode/utypes.h"
21
#include "unicode/uobject.h"
22
23
/**
24
* \file
25
* \brief C++ API: Builder API for trie builders
26
*/
27
28
// Forward declaration.
29
/// \cond
30
struct UHashtable;
31
typedef struct UHashtable UHashtable;
32
/// \endcond
33
34
/**
35
* Build options for BytesTrieBuilder and CharsTrieBuilder.
36
* @stable ICU 4.8
37
*/
38
enum UStringTrieBuildOption {
39
/**
40
* Builds a trie quickly.
41
* @stable ICU 4.8
42
*/
43
USTRINGTRIE_BUILD_FAST,
44
/**
45
* Builds a trie more slowly, attempting to generate
46
* a shorter but equivalent serialization.
47
* This build option also uses more memory.
48
*
49
* This option can be effective when many integer values are the same
50
* and string/byte sequence suffixes can be shared.
51
* Runtime speed is not expected to improve.
52
* @stable ICU 4.8
53
*/
54
USTRINGTRIE_BUILD_SMALL
55
};
56
57
U_NAMESPACE_BEGIN
58
59
/**
60
* Base class for string trie builder classes.
61
*
62
* This class is not intended for public subclassing.
63
* @stable ICU 4.8
64
*/
65
class U_COMMON_API StringTrieBuilder : public UObject {
66
public:
67
#ifndef U_HIDE_INTERNAL_API
68
/** @internal */
69
static int32_t hashNode(const void *node);
70
/** @internal */
71
static UBool equalNodes(const void *left, const void *right);
72
#endif /* U_HIDE_INTERNAL_API */
73
74
protected:
75
// Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API
76
// or else the compiler will create a public default constructor.
77
/** @internal */
78
StringTrieBuilder();
79
/** @internal */
80
virtual ~StringTrieBuilder();
81
82
#ifndef U_HIDE_INTERNAL_API
83
/** @internal */
84
void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
85
/** @internal */
86
void deleteCompactBuilder();
87
88
/** @internal */
89
void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
90
91
/** @internal */
92
int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
93
/** @internal */
94
int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
95
#endif /* U_HIDE_INTERNAL_API */
96
97
class Node;
98
99
#ifndef U_HIDE_INTERNAL_API
100
/** @internal */
101
Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
102
/** @internal */
103
Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
104
int32_t length, UErrorCode &errorCode);
105
#endif /* U_HIDE_INTERNAL_API */
106
107
/** @internal */
108
virtual int32_t getElementStringLength(int32_t i) const = 0;
109
/** @internal */
110
virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0;
111
/** @internal */
112
virtual int32_t getElementValue(int32_t i) const = 0;
113
114
// Finds the first unit index after this one where
115
// the first and last element have different units again.
116
/** @internal */
117
virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
118
119
// Number of different units at unitIndex.
120
/** @internal */
121
virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
122
/** @internal */
123
virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
124
/** @internal */
125
virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0;
126
127
/** @internal */
128
virtual UBool matchNodesCanHaveValues() const = 0;
129
130
/** @internal */
131
virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
132
/** @internal */
133
virtual int32_t getMinLinearMatch() const = 0;
134
/** @internal */
135
virtual int32_t getMaxLinearMatchLength() const = 0;
136
137
#ifndef U_HIDE_INTERNAL_API
138
// max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
139
/** @internal */
140
static const int32_t kMaxBranchLinearSubNodeLength=5;
141
142
// Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units.
143
// log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
144
/** @internal */
145
static const int32_t kMaxSplitBranchLevels=14;
146
147
/**
148
* Makes sure that there is only one unique node registered that is
149
* equivalent to newNode.
150
* @param newNode Input node. The builder takes ownership.
151
* @param errorCode ICU in/out UErrorCode.
152
Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
153
* @return newNode if it is the first of its kind, or
154
* an equivalent node if newNode is a duplicate.
155
* @internal
156
*/
157
Node *registerNode(Node *newNode, UErrorCode &errorCode);
158
/**
159
* Makes sure that there is only one unique FinalValueNode registered
160
* with this value.
161
* Avoids creating a node if the value is a duplicate.
162
* @param value A final value.
163
* @param errorCode ICU in/out UErrorCode.
164
Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
165
* @return A FinalValueNode with the given value.
166
* @internal
167
*/
168
Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
169
#endif /* U_HIDE_INTERNAL_API */
170
171
/*
172
* C++ note:
173
* registerNode() and registerFinalValue() take ownership of their input nodes,
174
* and only return owned nodes.
175
* If they see a failure UErrorCode, they will delete the input node.
176
* If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
177
* If there is a failure, they return NULL.
178
*
179
* NULL Node pointers can be safely passed into other Nodes because
180
* they call the static Node::hashCode() which checks for a NULL pointer first.
181
*
182
* Therefore, as long as builder functions register a new node,
183
* they need to check for failures only before explicitly dereferencing
184
* a Node pointer, or before setting a new UErrorCode.
185
*/
186
187
// Hash set of nodes, maps from nodes to integer 1.
188
/** @internal */
189
UHashtable *nodes;
190
191
// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
192
// it is needed for layout of other objects.
193
/**
194
* @internal
195
* \cond
196
*/
197
class Node : public UObject {
198
public:
199
Node(int32_t initialHash) : hash(initialHash), offset(0) {}
200
inline int32_t hashCode() const { return hash; }
201
// Handles node==NULL.
202
static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
203
// Base class operator==() compares the actual class types.
204
virtual UBool operator==(const Node &other) const;
205
inline UBool operator!=(const Node &other) const { return !operator==(other); }
206
/**
207
* Traverses the Node graph and numbers branch edges, with rightmost edges first.
208
* This is to avoid writing a duplicate node twice.
209
*
210
* Branch nodes in this trie data structure are not symmetric.
211
* Most branch edges "jump" to other nodes but the rightmost branch edges
212
* just continue without a jump.
213
* Therefore, write() must write the rightmost branch edge last
214
* (trie units are written backwards), and must write it at that point even if
215
* it is a duplicate of a node previously written elsewhere.
216
*
217
* This function visits and marks right branch edges first.
218
* Edges are numbered with increasingly negative values because we share the
219
* offset field which gets positive values when nodes are written.
220
* A branch edge also remembers the first number for any of its edges.
221
*
222
* When a further-left branch edge has a number in the range of the rightmost
223
* edge's numbers, then it will be written as part of the required right edge
224
* and we can avoid writing it first.
225
*
226
* After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
227
* edge numbers.
228
*
229
* @param edgeNumber The first edge number for this node and its sub-nodes.
230
* @return An edge number that is at least the maximum-negative
231
* of the input edge number and the numbers of this node and all of its sub-nodes.
232
*/
233
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
234
// write() must set the offset to a positive value.
235
virtual void write(StringTrieBuilder &builder) = 0;
236
// See markRightEdgesFirst.
237
inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
238
StringTrieBuilder &builder) {
239
// Note: Edge numbers are negative, lastRight<=firstRight.
240
// If offset>0 then this node and its sub-nodes have been written already
241
// and we need not write them again.
242
// If this node is part of the unwritten right branch edge,
243
// then we wait until that is written.
244
if(offset<0 && (offset<lastRight || firstRight<offset)) {
245
write(builder);
246
}
247
}
248
inline int32_t getOffset() const { return offset; }
249
protected:
250
int32_t hash;
251
int32_t offset;
252
};
253
254
#ifndef U_HIDE_INTERNAL_API
255
// This class should not be overridden because
256
// registerFinalValue() compares a stack-allocated FinalValueNode
257
// (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
258
// with the input node, and the
259
// !Node::operator==(other) used inside FinalValueNode::operator==(other)
260
// will be false if the typeid's are different.
261
/** @internal */
262
class FinalValueNode : public Node {
263
public:
264
FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
265
virtual UBool operator==(const Node &other) const;
266
virtual void write(StringTrieBuilder &builder);
267
protected:
268
int32_t value;
269
};
270
#endif /* U_HIDE_INTERNAL_API */
271
272
// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
273
// it is needed for layout of other objects.
274
/**
275
* @internal
276
*/
277
class ValueNode : public Node {
278
public:
279
ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
280
virtual UBool operator==(const Node &other) const;
281
void setValue(int32_t v) {
282
hasValue=TRUE;
283
value=v;
284
hash=hash*37u+v;
285
}
286
protected:
287
UBool hasValue;
288
int32_t value;
289
};
290
291
#ifndef U_HIDE_INTERNAL_API
292
/**
293
* @internal
294
*/
295
class IntermediateValueNode : public ValueNode {
296
public:
297
IntermediateValueNode(int32_t v, Node *nextNode)
298
: ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
299
virtual UBool operator==(const Node &other) const;
300
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
301
virtual void write(StringTrieBuilder &builder);
302
protected:
303
Node *next;
304
};
305
#endif /* U_HIDE_INTERNAL_API */
306
307
// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
308
// it is needed for layout of other objects.
309
/**
310
* @internal
311
*/
312
class LinearMatchNode : public ValueNode {
313
public:
314
LinearMatchNode(int32_t len, Node *nextNode)
315
: ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
316
length(len), next(nextNode) {}
317
virtual UBool operator==(const Node &other) const;
318
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
319
protected:
320
int32_t length;
321
Node *next;
322
};
323
324
#ifndef U_HIDE_INTERNAL_API
325
/**
326
* @internal
327
*/
328
class BranchNode : public Node {
329
public:
330
BranchNode(int32_t initialHash) : Node(initialHash) {}
331
protected:
332
int32_t firstEdgeNumber;
333
};
334
335
/**
336
* @internal
337
*/
338
class ListBranchNode : public BranchNode {
339
public:
340
ListBranchNode() : BranchNode(0x444444), length(0) {}
341
virtual UBool operator==(const Node &other) const;
342
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
343
virtual void write(StringTrieBuilder &builder);
344
// Adds a unit with a final value.
345
void add(int32_t c, int32_t value) {
346
units[length]=(char16_t)c;
347
equal[length]=NULL;
348
values[length]=value;
349
++length;
350
hash=(hash*37u+c)*37u+value;
351
}
352
// Adds a unit which leads to another match node.
353
void add(int32_t c, Node *node) {
354
units[length]=(char16_t)c;
355
equal[length]=node;
356
values[length]=0;
357
++length;
358
hash=(hash*37u+c)*37u+hashCode(node);
359
}
360
protected:
361
Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".
362
int32_t length;
363
int32_t values[kMaxBranchLinearSubNodeLength];
364
char16_t units[kMaxBranchLinearSubNodeLength];
365
};
366
367
/**
368
* @internal
369
*/
370
class SplitBranchNode : public BranchNode {
371
public:
372
SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
373
: BranchNode(((0x555555u*37u+middleUnit)*37u+
374
hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
375
unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
376
virtual UBool operator==(const Node &other) const;
377
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
378
virtual void write(StringTrieBuilder &builder);
379
protected:
380
char16_t unit;
381
Node *lessThan;
382
Node *greaterOrEqual;
383
};
384
385
// Branch head node, for writing the actual node lead unit.
386
/** @internal */
387
class BranchHeadNode : public ValueNode {
388
public:
389
BranchHeadNode(int32_t len, Node *subNode)
390
: ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
391
length(len), next(subNode) {}
392
virtual UBool operator==(const Node &other) const;
393
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
394
virtual void write(StringTrieBuilder &builder);
395
protected:
396
int32_t length;
397
Node *next; // A branch sub-node.
398
};
399
400
#endif /* U_HIDE_INTERNAL_API */
401
/// \endcond
402
403
/** @internal */
404
virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
405
Node *nextNode) const = 0;
406
407
/** @internal */
408
virtual int32_t write(int32_t unit) = 0;
409
/** @internal */
410
virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
411
/** @internal */
412
virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
413
/** @internal */
414
virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
415
/** @internal */
416
virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
417
};
418
419
U_NAMESPACE_END
420
421
#endif // __STRINGTRIEBUILDER_H__
422
423