Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-aarch32-jdk8u
Path: blob/jdk8u272-b10-aarch32-20201026/jdk/src/share/native/common/unicode/caniter.h
48729 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 1996-2014, International Business Machines Corporation and
6
* others. All Rights Reserved.
7
*******************************************************************************
8
*/
9
10
#ifndef CANITER_H
11
#define CANITER_H
12
13
#include "unicode/utypes.h"
14
15
#if !UCONFIG_NO_NORMALIZATION
16
17
#include "unicode/uobject.h"
18
#include "unicode/unistr.h"
19
20
/**
21
* \file
22
* \brief C++ API: Canonical Iterator
23
*/
24
25
/** Should permutation skip characters with combining class zero
26
* Should be either TRUE or FALSE. This is a compile time option
27
* @stable ICU 2.4
28
*/
29
#ifndef CANITER_SKIP_ZEROES
30
#define CANITER_SKIP_ZEROES TRUE
31
#endif
32
33
U_NAMESPACE_BEGIN
34
35
class Hashtable;
36
class Normalizer2;
37
class Normalizer2Impl;
38
39
/**
40
* This class allows one to iterate through all the strings that are canonically equivalent to a given
41
* string. For example, here are some sample results:
42
Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
43
1: \\u0041\\u030A\\u0064\\u0307\\u0327
44
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
45
2: \\u0041\\u030A\\u0064\\u0327\\u0307
46
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
47
3: \\u0041\\u030A\\u1E0B\\u0327
48
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
49
4: \\u0041\\u030A\\u1E11\\u0307
50
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
51
5: \\u00C5\\u0064\\u0307\\u0327
52
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
53
6: \\u00C5\\u0064\\u0327\\u0307
54
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
55
7: \\u00C5\\u1E0B\\u0327
56
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
57
8: \\u00C5\\u1E11\\u0307
58
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
59
9: \\u212B\\u0064\\u0307\\u0327
60
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
61
10: \\u212B\\u0064\\u0327\\u0307
62
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
63
11: \\u212B\\u1E0B\\u0327
64
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
65
12: \\u212B\\u1E11\\u0307
66
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
67
*<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
68
* since it has not been optimized for that situation.
69
* Note, CanonicalIterator is not intended to be subclassed.
70
* @author M. Davis
71
* @author C++ port by V. Weinstein
72
* @stable ICU 2.4
73
*/
74
class U_COMMON_API CanonicalIterator U_FINAL : public UObject {
75
public:
76
/**
77
* Construct a CanonicalIterator object
78
* @param source string to get results for
79
* @param status Fill-in parameter which receives the status of this operation.
80
* @stable ICU 2.4
81
*/
82
CanonicalIterator(const UnicodeString &source, UErrorCode &status);
83
84
/** Destructor
85
* Cleans pieces
86
* @stable ICU 2.4
87
*/
88
virtual ~CanonicalIterator();
89
90
/**
91
* Gets the NFD form of the current source we are iterating over.
92
* @return gets the source: NOTE: it is the NFD form of source
93
* @stable ICU 2.4
94
*/
95
UnicodeString getSource();
96
97
/**
98
* Resets the iterator so that one can start again from the beginning.
99
* @stable ICU 2.4
100
*/
101
void reset();
102
103
/**
104
* Get the next canonically equivalent string.
105
* <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
106
* @return the next string that is canonically equivalent. A bogus string is returned when
107
* the iteration is done.
108
* @stable ICU 2.4
109
*/
110
UnicodeString next();
111
112
/**
113
* Set a new source for this iterator. Allows object reuse.
114
* @param newSource the source string to iterate against. This allows the same iterator to be used
115
* while changing the source string, saving object creation.
116
* @param status Fill-in parameter which receives the status of this operation.
117
* @stable ICU 2.4
118
*/
119
void setSource(const UnicodeString &newSource, UErrorCode &status);
120
121
#ifndef U_HIDE_INTERNAL_API
122
/**
123
* Dumb recursive implementation of permutation.
124
* TODO: optimize
125
* @param source the string to find permutations for
126
* @param skipZeros determine if skip zeros
127
* @param result the results in a set.
128
* @param status Fill-in parameter which receives the status of this operation.
129
* @internal
130
*/
131
static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
132
#endif /* U_HIDE_INTERNAL_API */
133
134
/**
135
* ICU "poor man's RTTI", returns a UClassID for this class.
136
*
137
* @stable ICU 2.2
138
*/
139
static UClassID U_EXPORT2 getStaticClassID();
140
141
/**
142
* ICU "poor man's RTTI", returns a UClassID for the actual class.
143
*
144
* @stable ICU 2.2
145
*/
146
virtual UClassID getDynamicClassID() const;
147
148
private:
149
// ===================== PRIVATES ==============================
150
// private default constructor
151
CanonicalIterator();
152
153
154
/**
155
* Copy constructor. Private for now.
156
* @internal (private)
157
*/
158
CanonicalIterator(const CanonicalIterator& other);
159
160
/**
161
* Assignment operator. Private for now.
162
* @internal (private)
163
*/
164
CanonicalIterator& operator=(const CanonicalIterator& other);
165
166
// fields
167
UnicodeString source;
168
UBool done;
169
170
// 2 dimensional array holds the pieces of the string with
171
// their different canonically equivalent representations
172
UnicodeString **pieces;
173
int32_t pieces_length;
174
int32_t *pieces_lengths;
175
176
// current is used in iterating to combine pieces
177
int32_t *current;
178
int32_t current_length;
179
180
// transient fields
181
UnicodeString buffer;
182
183
const Normalizer2 &nfd;
184
const Normalizer2Impl &nfcImpl;
185
186
// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
187
UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
188
189
//Set getEquivalents2(String segment);
190
Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status);
191
//Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
192
193
/**
194
* See if the decomposition of cp2 is at segment starting at segmentPos
195
* (with canonical rearrangment!)
196
* If so, take the remainder, and return the equivalents
197
*/
198
//Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
199
Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
200
//Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
201
202
void cleanPieces();
203
204
};
205
206
U_NAMESPACE_END
207
208
#endif /* #if !UCONFIG_NO_NORMALIZATION */
209
210
#endif
211
212