Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/text/normalizer/CharTrie.java
38830 views
1
/*
2
* Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
/*
26
*******************************************************************************
27
* (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved *
28
* *
29
* The original version of this source code and documentation is copyrighted *
30
* and owned by IBM, These materials are provided under terms of a License *
31
* Agreement between IBM and Sun. This technology is protected by multiple *
32
* US and International patents. This notice and attribution to IBM may not *
33
* to removed. *
34
*******************************************************************************
35
*/
36
37
package sun.text.normalizer;
38
39
import java.io.InputStream;
40
import java.io.DataInputStream;
41
import java.io.IOException;
42
43
/**
44
* Trie implementation which stores data in char, 16 bits.
45
* @author synwee
46
* @see com.ibm.icu.impl.Trie
47
* @since release 2.1, Jan 01 2002
48
*/
49
50
// note that i need to handle the block calculations later, since chartrie
51
// in icu4c uses the same index array.
52
public class CharTrie extends Trie
53
{
54
// public constructors ---------------------------------------------
55
56
/**
57
* <p>Creates a new Trie with the settings for the trie data.</p>
58
* <p>Unserialize the 32-bit-aligned input stream and use the data for the
59
* trie.</p>
60
* @param inputStream file input stream to a ICU data file, containing
61
* the trie
62
* @param dataManipulate object which provides methods to parse the char
63
* data
64
* @throws IOException thrown when data reading fails
65
* @draft 2.1
66
*/
67
public CharTrie(InputStream inputStream,
68
DataManipulate dataManipulate) throws IOException
69
{
70
super(inputStream, dataManipulate);
71
72
if (!isCharTrie()) {
73
throw new IllegalArgumentException(
74
"Data given does not belong to a char trie.");
75
}
76
m_friendAgent_ = new FriendAgent();
77
}
78
79
/**
80
* Make a dummy CharTrie.
81
* A dummy trie is an empty runtime trie, used when a real data trie cannot
82
* be loaded.
83
*
84
* The trie always returns the initialValue,
85
* or the leadUnitValue for lead surrogate code points.
86
* The Latin-1 part is always set up to be linear.
87
*
88
* @param initialValue the initial value that is set for all code points
89
* @param leadUnitValue the value for lead surrogate code _units_ that do not
90
* have associated supplementary data
91
* @param dataManipulate object which provides methods to parse the char data
92
*/
93
public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
94
super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
95
96
int dataLength, latin1Length, i, limit;
97
char block;
98
99
/* calculate the actual size of the dummy trie data */
100
101
/* max(Latin-1, block 0) */
102
dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
103
if(leadUnitValue!=initialValue) {
104
dataLength+=DATA_BLOCK_LENGTH;
105
}
106
m_data_=new char[dataLength];
107
m_dataLength_=dataLength;
108
109
m_initialValue_=(char)initialValue;
110
111
/* fill the index and data arrays */
112
113
/* indexes are preset to 0 (block 0) */
114
115
/* Latin-1 data */
116
for(i=0; i<latin1Length; ++i) {
117
m_data_[i]=(char)initialValue;
118
}
119
120
if(leadUnitValue!=initialValue) {
121
/* indexes for lead surrogate code units to the block after Latin-1 */
122
block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
123
i=0xd800>>INDEX_STAGE_1_SHIFT_;
124
limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
125
for(; i<limit; ++i) {
126
m_index_[i]=block;
127
}
128
129
/* data for lead surrogate code units */
130
limit=latin1Length+DATA_BLOCK_LENGTH;
131
for(i=latin1Length; i<limit; ++i) {
132
m_data_[i]=(char)leadUnitValue;
133
}
134
}
135
136
m_friendAgent_ = new FriendAgent();
137
}
138
139
/**
140
* Java friend implementation
141
*/
142
public class FriendAgent
143
{
144
/**
145
* Gives out the index array of the trie
146
* @return index array of trie
147
*/
148
public char[] getPrivateIndex()
149
{
150
return m_index_;
151
}
152
/**
153
* Gives out the data array of the trie
154
* @return data array of trie
155
*/
156
public char[] getPrivateData()
157
{
158
return m_data_;
159
}
160
/**
161
* Gives out the data offset in the trie
162
* @return data offset in the trie
163
*/
164
public int getPrivateInitialValue()
165
{
166
return m_initialValue_;
167
}
168
}
169
170
// public methods --------------------------------------------------
171
172
/**
173
* Java friend implementation
174
* To store the index and data array into the argument.
175
* @param friend java friend UCharacterProperty object to store the array
176
*/
177
public void putIndexData(UCharacterProperty friend)
178
{
179
friend.setIndexData(m_friendAgent_);
180
}
181
182
/**
183
* Gets the value associated with the codepoint.
184
* If no value is associated with the codepoint, a default value will be
185
* returned.
186
* @param ch codepoint
187
* @return offset to data
188
* @draft 2.1
189
*/
190
public final char getCodePointValue(int ch)
191
{
192
int offset;
193
194
// fastpath for U+0000..U+D7FF
195
if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
196
// copy of getRawOffset()
197
offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
198
+ (ch & INDEX_STAGE_3_MASK_);
199
return m_data_[offset];
200
}
201
202
// handle U+D800..U+10FFFF
203
offset = getCodePointOffset(ch);
204
205
// return -1 if there is an error, in this case we return the default
206
// value: m_initialValue_
207
return (offset >= 0) ? m_data_[offset] : m_initialValue_;
208
}
209
210
/**
211
* Gets the value to the data which this lead surrogate character points
212
* to.
213
* Returned data may contain folding offset information for the next
214
* trailing surrogate character.
215
* This method does not guarantee correct results for trail surrogates.
216
* @param ch lead surrogate character
217
* @return data value
218
* @draft 2.1
219
*/
220
public final char getLeadValue(char ch)
221
{
222
return m_data_[getLeadOffset(ch)];
223
}
224
225
/**
226
* Get the value associated with a pair of surrogates.
227
* @param lead a lead surrogate
228
* @param trail a trail surrogate
229
* @draft 2.1
230
*/
231
public final char getSurrogateValue(char lead, char trail)
232
{
233
int offset = getSurrogateOffset(lead, trail);
234
if (offset > 0) {
235
return m_data_[offset];
236
}
237
return m_initialValue_;
238
}
239
240
/**
241
* <p>Get a value from a folding offset (from the value of a lead surrogate)
242
* and a trail surrogate.</p>
243
* <p>If the
244
* @param leadvalue value associated with the lead surrogate which contains
245
* the folding offset
246
* @param trail surrogate
247
* @return trie data value associated with the trail character
248
* @draft 2.1
249
*/
250
public final char getTrailValue(int leadvalue, char trail)
251
{
252
if (m_dataManipulate_ == null) {
253
throw new NullPointerException(
254
"The field DataManipulate in this Trie is null");
255
}
256
int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
257
if (offset > 0) {
258
return m_data_[getRawOffset(offset,
259
(char)(trail & SURROGATE_MASK_))];
260
}
261
return m_initialValue_;
262
}
263
264
// protected methods -----------------------------------------------
265
266
/**
267
* <p>Parses the input stream and stores its trie content into a index and
268
* data array</p>
269
* @param inputStream data input stream containing trie data
270
* @exception IOException thrown when data reading fails
271
*/
272
protected final void unserialize(InputStream inputStream)
273
throws IOException
274
{
275
DataInputStream input = new DataInputStream(inputStream);
276
int indexDataLength = m_dataOffset_ + m_dataLength_;
277
m_index_ = new char[indexDataLength];
278
for (int i = 0; i < indexDataLength; i ++) {
279
m_index_[i] = input.readChar();
280
}
281
m_data_ = m_index_;
282
m_initialValue_ = m_data_[m_dataOffset_];
283
}
284
285
/**
286
* Gets the offset to the data which the surrogate pair points to.
287
* @param lead lead surrogate
288
* @param trail trailing surrogate
289
* @return offset to data
290
* @draft 2.1
291
*/
292
protected final int getSurrogateOffset(char lead, char trail)
293
{
294
if (m_dataManipulate_ == null) {
295
throw new NullPointerException(
296
"The field DataManipulate in this Trie is null");
297
}
298
299
// get fold position for the next trail surrogate
300
int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
301
302
// get the real data from the folded lead/trail units
303
if (offset > 0) {
304
return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
305
}
306
307
// return -1 if there is an error, in this case we return the default
308
// value: m_initialValue_
309
return -1;
310
}
311
312
/**
313
* Gets the value at the argument index.
314
* For use internally in TrieIterator.
315
* @param index value at index will be retrieved
316
* @return 32 bit value
317
* @see com.ibm.icu.impl.TrieIterator
318
* @draft 2.1
319
*/
320
protected final int getValue(int index)
321
{
322
return m_data_[index];
323
}
324
325
/**
326
* Gets the default initial value
327
* @return 32 bit value
328
* @draft 2.1
329
*/
330
protected final int getInitialValue()
331
{
332
return m_initialValue_;
333
}
334
335
// private data members --------------------------------------------
336
337
/**
338
* Default value
339
*/
340
private char m_initialValue_;
341
/**
342
* Array of char data
343
*/
344
private char m_data_[];
345
/**
346
* Agent for friends
347
*/
348
private FriendAgent m_friendAgent_;
349
}
350
351