Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/java/text/CollationElementIterator.java
38829 views
1
/*
2
* Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
/*
27
* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28
* (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
29
*
30
* The original version of this source code and documentation is copyrighted
31
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32
* materials are provided under terms of a License Agreement between Taligent
33
* and Sun. This technology is protected by multiple US and International
34
* patents. This notice and attribution to Taligent may not be removed.
35
* Taligent is a registered trademark of Taligent, Inc.
36
*
37
*/
38
39
package java.text;
40
41
import java.lang.Character;
42
import java.util.Vector;
43
import sun.text.CollatorUtilities;
44
import sun.text.normalizer.NormalizerBase;
45
46
/**
47
* The <code>CollationElementIterator</code> class is used as an iterator
48
* to walk through each character of an international string. Use the iterator
49
* to return the ordering priority of the positioned character. The ordering
50
* priority of a character, which we refer to as a key, defines how a character
51
* is collated in the given collation object.
52
*
53
* <p>
54
* For example, consider the following in Spanish:
55
* <blockquote>
56
* <pre>
57
* "ca" &rarr; the first key is key('c') and second key is key('a').
58
* "cha" &rarr; the first key is key('ch') and second key is key('a').
59
* </pre>
60
* </blockquote>
61
* And in German,
62
* <blockquote>
63
* <pre>
64
* "\u00e4b" &rarr; the first key is key('a'), the second key is key('e'), and
65
* the third key is key('b').
66
* </pre>
67
* </blockquote>
68
* The key of a character is an integer composed of primary order(short),
69
* secondary order(byte), and tertiary order(byte). Java strictly defines
70
* the size and signedness of its primitive data types. Therefore, the static
71
* functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and
72
* <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>,
73
* and <code>short</code> respectively to ensure the correctness of the key
74
* value.
75
*
76
* <p>
77
* Example of the iterator usage,
78
* <blockquote>
79
* <pre>
80
*
81
* String testString = "This is a test";
82
* Collator col = Collator.getInstance();
83
* if (col instanceof RuleBasedCollator) {
84
* RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)col;
85
* CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString);
86
* int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next());
87
* :
88
* }
89
* </pre>
90
* </blockquote>
91
*
92
* <p>
93
* <code>CollationElementIterator.next</code> returns the collation order
94
* of the next character. A collation order consists of primary order,
95
* secondary order and tertiary order. The data type of the collation
96
* order is <strong>int</strong>. The first 16 bits of a collation order
97
* is its primary order; the next 8 bits is the secondary order and the
98
* last 8 bits is the tertiary order.
99
*
100
* <p><b>Note:</b> <code>CollationElementIterator</code> is a part of
101
* <code>RuleBasedCollator</code> implementation. It is only usable
102
* with <code>RuleBasedCollator</code> instances.
103
*
104
* @see Collator
105
* @see RuleBasedCollator
106
* @author Helena Shih, Laura Werner, Richard Gillam
107
*/
108
public final class CollationElementIterator
109
{
110
/**
111
* Null order which indicates the end of string is reached by the
112
* cursor.
113
*/
114
public final static int NULLORDER = 0xffffffff;
115
116
/**
117
* CollationElementIterator constructor. This takes the source string and
118
* the collation object. The cursor will walk thru the source string based
119
* on the predefined collation rules. If the source string is empty,
120
* NULLORDER will be returned on the calls to next().
121
* @param sourceText the source string.
122
* @param owner the collation object.
123
*/
124
CollationElementIterator(String sourceText, RuleBasedCollator owner) {
125
this.owner = owner;
126
ordering = owner.getTables();
127
if ( sourceText.length() != 0 ) {
128
NormalizerBase.Mode mode =
129
CollatorUtilities.toNormalizerMode(owner.getDecomposition());
130
text = new NormalizerBase(sourceText, mode);
131
}
132
}
133
134
/**
135
* CollationElementIterator constructor. This takes the source string and
136
* the collation object. The cursor will walk thru the source string based
137
* on the predefined collation rules. If the source string is empty,
138
* NULLORDER will be returned on the calls to next().
139
* @param sourceText the source string.
140
* @param owner the collation object.
141
*/
142
CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner) {
143
this.owner = owner;
144
ordering = owner.getTables();
145
NormalizerBase.Mode mode =
146
CollatorUtilities.toNormalizerMode(owner.getDecomposition());
147
text = new NormalizerBase(sourceText, mode);
148
}
149
150
/**
151
* Resets the cursor to the beginning of the string. The next call
152
* to next() will return the first collation element in the string.
153
*/
154
public void reset()
155
{
156
if (text != null) {
157
text.reset();
158
NormalizerBase.Mode mode =
159
CollatorUtilities.toNormalizerMode(owner.getDecomposition());
160
text.setMode(mode);
161
}
162
buffer = null;
163
expIndex = 0;
164
swapOrder = 0;
165
}
166
167
/**
168
* Get the next collation element in the string. <p>This iterator iterates
169
* over a sequence of collation elements that were built from the string.
170
* Because there isn't necessarily a one-to-one mapping from characters to
171
* collation elements, this doesn't mean the same thing as "return the
172
* collation element [or ordering priority] of the next character in the
173
* string".</p>
174
* <p>This function returns the collation element that the iterator is currently
175
* pointing to and then updates the internal pointer to point to the next element.
176
* previous() updates the pointer first and then returns the element. This
177
* means that when you change direction while iterating (i.e., call next() and
178
* then call previous(), or call previous() and then call next()), you'll get
179
* back the same element twice.</p>
180
*
181
* @return the next collation element
182
*/
183
public int next()
184
{
185
if (text == null) {
186
return NULLORDER;
187
}
188
NormalizerBase.Mode textMode = text.getMode();
189
// convert the owner's mode to something the Normalizer understands
190
NormalizerBase.Mode ownerMode =
191
CollatorUtilities.toNormalizerMode(owner.getDecomposition());
192
if (textMode != ownerMode) {
193
text.setMode(ownerMode);
194
}
195
196
// if buffer contains any decomposed char values
197
// return their strength orders before continuing in
198
// the Normalizer's CharacterIterator.
199
if (buffer != null) {
200
if (expIndex < buffer.length) {
201
return strengthOrder(buffer[expIndex++]);
202
} else {
203
buffer = null;
204
expIndex = 0;
205
}
206
} else if (swapOrder != 0) {
207
if (Character.isSupplementaryCodePoint(swapOrder)) {
208
char[] chars = Character.toChars(swapOrder);
209
swapOrder = chars[1];
210
return chars[0] << 16;
211
}
212
int order = swapOrder << 16;
213
swapOrder = 0;
214
return order;
215
}
216
int ch = text.next();
217
218
// are we at the end of Normalizer's text?
219
if (ch == NormalizerBase.DONE) {
220
return NULLORDER;
221
}
222
223
int value = ordering.getUnicodeOrder(ch);
224
if (value == RuleBasedCollator.UNMAPPED) {
225
swapOrder = ch;
226
return UNMAPPEDCHARVALUE;
227
}
228
else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
229
value = nextContractChar(ch);
230
}
231
if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
232
buffer = ordering.getExpandValueList(value);
233
expIndex = 0;
234
value = buffer[expIndex++];
235
}
236
237
if (ordering.isSEAsianSwapping()) {
238
int consonant;
239
if (isThaiPreVowel(ch)) {
240
consonant = text.next();
241
if (isThaiBaseConsonant(consonant)) {
242
buffer = makeReorderedBuffer(consonant, value, buffer, true);
243
value = buffer[0];
244
expIndex = 1;
245
} else if (consonant != NormalizerBase.DONE) {
246
text.previous();
247
}
248
}
249
if (isLaoPreVowel(ch)) {
250
consonant = text.next();
251
if (isLaoBaseConsonant(consonant)) {
252
buffer = makeReorderedBuffer(consonant, value, buffer, true);
253
value = buffer[0];
254
expIndex = 1;
255
} else if (consonant != NormalizerBase.DONE) {
256
text.previous();
257
}
258
}
259
}
260
261
return strengthOrder(value);
262
}
263
264
/**
265
* Get the previous collation element in the string. <p>This iterator iterates
266
* over a sequence of collation elements that were built from the string.
267
* Because there isn't necessarily a one-to-one mapping from characters to
268
* collation elements, this doesn't mean the same thing as "return the
269
* collation element [or ordering priority] of the previous character in the
270
* string".</p>
271
* <p>This function updates the iterator's internal pointer to point to the
272
* collation element preceding the one it's currently pointing to and then
273
* returns that element, while next() returns the current element and then
274
* updates the pointer. This means that when you change direction while
275
* iterating (i.e., call next() and then call previous(), or call previous()
276
* and then call next()), you'll get back the same element twice.</p>
277
*
278
* @return the previous collation element
279
* @since 1.2
280
*/
281
public int previous()
282
{
283
if (text == null) {
284
return NULLORDER;
285
}
286
NormalizerBase.Mode textMode = text.getMode();
287
// convert the owner's mode to something the Normalizer understands
288
NormalizerBase.Mode ownerMode =
289
CollatorUtilities.toNormalizerMode(owner.getDecomposition());
290
if (textMode != ownerMode) {
291
text.setMode(ownerMode);
292
}
293
if (buffer != null) {
294
if (expIndex > 0) {
295
return strengthOrder(buffer[--expIndex]);
296
} else {
297
buffer = null;
298
expIndex = 0;
299
}
300
} else if (swapOrder != 0) {
301
if (Character.isSupplementaryCodePoint(swapOrder)) {
302
char[] chars = Character.toChars(swapOrder);
303
swapOrder = chars[1];
304
return chars[0] << 16;
305
}
306
int order = swapOrder << 16;
307
swapOrder = 0;
308
return order;
309
}
310
int ch = text.previous();
311
if (ch == NormalizerBase.DONE) {
312
return NULLORDER;
313
}
314
315
int value = ordering.getUnicodeOrder(ch);
316
317
if (value == RuleBasedCollator.UNMAPPED) {
318
swapOrder = UNMAPPEDCHARVALUE;
319
return ch;
320
} else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
321
value = prevContractChar(ch);
322
}
323
if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
324
buffer = ordering.getExpandValueList(value);
325
expIndex = buffer.length;
326
value = buffer[--expIndex];
327
}
328
329
if (ordering.isSEAsianSwapping()) {
330
int vowel;
331
if (isThaiBaseConsonant(ch)) {
332
vowel = text.previous();
333
if (isThaiPreVowel(vowel)) {
334
buffer = makeReorderedBuffer(vowel, value, buffer, false);
335
expIndex = buffer.length - 1;
336
value = buffer[expIndex];
337
} else {
338
text.next();
339
}
340
}
341
if (isLaoBaseConsonant(ch)) {
342
vowel = text.previous();
343
if (isLaoPreVowel(vowel)) {
344
buffer = makeReorderedBuffer(vowel, value, buffer, false);
345
expIndex = buffer.length - 1;
346
value = buffer[expIndex];
347
} else {
348
text.next();
349
}
350
}
351
}
352
353
return strengthOrder(value);
354
}
355
356
/**
357
* Return the primary component of a collation element.
358
* @param order the collation element
359
* @return the element's primary component
360
*/
361
public final static int primaryOrder(int order)
362
{
363
order &= RBCollationTables.PRIMARYORDERMASK;
364
return (order >>> RBCollationTables.PRIMARYORDERSHIFT);
365
}
366
/**
367
* Return the secondary component of a collation element.
368
* @param order the collation element
369
* @return the element's secondary component
370
*/
371
public final static short secondaryOrder(int order)
372
{
373
order = order & RBCollationTables.SECONDARYORDERMASK;
374
return ((short)(order >> RBCollationTables.SECONDARYORDERSHIFT));
375
}
376
/**
377
* Return the tertiary component of a collation element.
378
* @param order the collation element
379
* @return the element's tertiary component
380
*/
381
public final static short tertiaryOrder(int order)
382
{
383
return ((short)(order &= RBCollationTables.TERTIARYORDERMASK));
384
}
385
386
/**
387
* Get the comparison order in the desired strength. Ignore the other
388
* differences.
389
* @param order The order value
390
*/
391
final int strengthOrder(int order)
392
{
393
int s = owner.getStrength();
394
if (s == Collator.PRIMARY)
395
{
396
order &= RBCollationTables.PRIMARYDIFFERENCEONLY;
397
} else if (s == Collator.SECONDARY)
398
{
399
order &= RBCollationTables.SECONDARYDIFFERENCEONLY;
400
}
401
return order;
402
}
403
404
/**
405
* Sets the iterator to point to the collation element corresponding to
406
* the specified character (the parameter is a CHARACTER offset in the
407
* original string, not an offset into its corresponding sequence of
408
* collation elements). The value returned by the next call to next()
409
* will be the collation element corresponding to the specified position
410
* in the text. If that position is in the middle of a contracting
411
* character sequence, the result of the next call to next() is the
412
* collation element for that sequence. This means that getOffset()
413
* is not guaranteed to return the same value as was passed to a preceding
414
* call to setOffset().
415
*
416
* @param newOffset The new character offset into the original text.
417
* @since 1.2
418
*/
419
@SuppressWarnings("deprecation") // getBeginIndex, getEndIndex and setIndex are deprecated
420
public void setOffset(int newOffset)
421
{
422
if (text != null) {
423
if (newOffset < text.getBeginIndex()
424
|| newOffset >= text.getEndIndex()) {
425
text.setIndexOnly(newOffset);
426
} else {
427
int c = text.setIndex(newOffset);
428
429
// if the desired character isn't used in a contracting character
430
// sequence, bypass all the backing-up logic-- we're sitting on
431
// the right character already
432
if (ordering.usedInContractSeq(c)) {
433
// walk backwards through the string until we see a character
434
// that DOESN'T participate in a contracting character sequence
435
while (ordering.usedInContractSeq(c)) {
436
c = text.previous();
437
}
438
// now walk forward using this object's next() method until
439
// we pass the starting point and set our current position
440
// to the beginning of the last "character" before or at
441
// our starting position
442
int last = text.getIndex();
443
while (text.getIndex() <= newOffset) {
444
last = text.getIndex();
445
next();
446
}
447
text.setIndexOnly(last);
448
// we don't need this, since last is the last index
449
// that is the starting of the contraction which encompass
450
// newOffset
451
// text.previous();
452
}
453
}
454
}
455
buffer = null;
456
expIndex = 0;
457
swapOrder = 0;
458
}
459
460
/**
461
* Returns the character offset in the original text corresponding to the next
462
* collation element. (That is, getOffset() returns the position in the text
463
* corresponding to the collation element that will be returned by the next
464
* call to next().) This value will always be the index of the FIRST character
465
* corresponding to the collation element (a contracting character sequence is
466
* when two or more characters all correspond to the same collation element).
467
* This means if you do setOffset(x) followed immediately by getOffset(), getOffset()
468
* won't necessarily return x.
469
*
470
* @return The character offset in the original text corresponding to the collation
471
* element that will be returned by the next call to next().
472
* @since 1.2
473
*/
474
public int getOffset()
475
{
476
return (text != null) ? text.getIndex() : 0;
477
}
478
479
480
/**
481
* Return the maximum length of any expansion sequences that end
482
* with the specified comparison order.
483
* @param order a collation order returned by previous or next.
484
* @return the maximum length of any expansion sequences ending
485
* with the specified order.
486
* @since 1.2
487
*/
488
public int getMaxExpansion(int order)
489
{
490
return ordering.getMaxExpansion(order);
491
}
492
493
/**
494
* Set a new string over which to iterate.
495
*
496
* @param source the new source text
497
* @since 1.2
498
*/
499
public void setText(String source)
500
{
501
buffer = null;
502
swapOrder = 0;
503
expIndex = 0;
504
NormalizerBase.Mode mode =
505
CollatorUtilities.toNormalizerMode(owner.getDecomposition());
506
if (text == null) {
507
text = new NormalizerBase(source, mode);
508
} else {
509
text.setMode(mode);
510
text.setText(source);
511
}
512
}
513
514
/**
515
* Set a new string over which to iterate.
516
*
517
* @param source the new source text.
518
* @since 1.2
519
*/
520
public void setText(CharacterIterator source)
521
{
522
buffer = null;
523
swapOrder = 0;
524
expIndex = 0;
525
NormalizerBase.Mode mode =
526
CollatorUtilities.toNormalizerMode(owner.getDecomposition());
527
if (text == null) {
528
text = new NormalizerBase(source, mode);
529
} else {
530
text.setMode(mode);
531
text.setText(source);
532
}
533
}
534
535
//============================================================
536
// privates
537
//============================================================
538
539
/**
540
* Determine if a character is a Thai vowel (which sorts after
541
* its base consonant).
542
*/
543
private final static boolean isThaiPreVowel(int ch) {
544
return (ch >= 0x0e40) && (ch <= 0x0e44);
545
}
546
547
/**
548
* Determine if a character is a Thai base consonant
549
*/
550
private final static boolean isThaiBaseConsonant(int ch) {
551
return (ch >= 0x0e01) && (ch <= 0x0e2e);
552
}
553
554
/**
555
* Determine if a character is a Lao vowel (which sorts after
556
* its base consonant).
557
*/
558
private final static boolean isLaoPreVowel(int ch) {
559
return (ch >= 0x0ec0) && (ch <= 0x0ec4);
560
}
561
562
/**
563
* Determine if a character is a Lao base consonant
564
*/
565
private final static boolean isLaoBaseConsonant(int ch) {
566
return (ch >= 0x0e81) && (ch <= 0x0eae);
567
}
568
569
/**
570
* This method produces a buffer which contains the collation
571
* elements for the two characters, with colFirst's values preceding
572
* another character's. Presumably, the other character precedes colFirst
573
* in logical order (otherwise you wouldn't need this method would you?).
574
* The assumption is that the other char's value(s) have already been
575
* computed. If this char has a single element it is passed to this
576
* method as lastValue, and lastExpansion is null. If it has an
577
* expansion it is passed in lastExpansion, and colLastValue is ignored.
578
*/
579
private int[] makeReorderedBuffer(int colFirst,
580
int lastValue,
581
int[] lastExpansion,
582
boolean forward) {
583
584
int[] result;
585
586
int firstValue = ordering.getUnicodeOrder(colFirst);
587
if (firstValue >= RuleBasedCollator.CONTRACTCHARINDEX) {
588
firstValue = forward? nextContractChar(colFirst) : prevContractChar(colFirst);
589
}
590
591
int[] firstExpansion = null;
592
if (firstValue >= RuleBasedCollator.EXPANDCHARINDEX) {
593
firstExpansion = ordering.getExpandValueList(firstValue);
594
}
595
596
if (!forward) {
597
int temp1 = firstValue;
598
firstValue = lastValue;
599
lastValue = temp1;
600
int[] temp2 = firstExpansion;
601
firstExpansion = lastExpansion;
602
lastExpansion = temp2;
603
}
604
605
if (firstExpansion == null && lastExpansion == null) {
606
result = new int [2];
607
result[0] = firstValue;
608
result[1] = lastValue;
609
}
610
else {
611
int firstLength = firstExpansion==null? 1 : firstExpansion.length;
612
int lastLength = lastExpansion==null? 1 : lastExpansion.length;
613
result = new int[firstLength + lastLength];
614
615
if (firstExpansion == null) {
616
result[0] = firstValue;
617
}
618
else {
619
System.arraycopy(firstExpansion, 0, result, 0, firstLength);
620
}
621
622
if (lastExpansion == null) {
623
result[firstLength] = lastValue;
624
}
625
else {
626
System.arraycopy(lastExpansion, 0, result, firstLength, lastLength);
627
}
628
}
629
630
return result;
631
}
632
633
/**
634
* Check if a comparison order is ignorable.
635
* @return true if a character is ignorable, false otherwise.
636
*/
637
final static boolean isIgnorable(int order)
638
{
639
return ((primaryOrder(order) == 0) ? true : false);
640
}
641
642
/**
643
* Get the ordering priority of the next contracting character in the
644
* string.
645
* @param ch the starting character of a contracting character token
646
* @return the next contracting character's ordering. Returns NULLORDER
647
* if the end of string is reached.
648
*/
649
private int nextContractChar(int ch)
650
{
651
// First get the ordering of this single character,
652
// which is always the first element in the list
653
Vector<EntryPair> list = ordering.getContractValues(ch);
654
EntryPair pair = list.firstElement();
655
int order = pair.value;
656
657
// find out the length of the longest contracting character sequence in the list.
658
// There's logic in the builder code to make sure the longest sequence is always
659
// the last.
660
pair = list.lastElement();
661
int maxLength = pair.entryName.length();
662
663
// (the Normalizer is cloned here so that the seeking we do in the next loop
664
// won't affect our real position in the text)
665
NormalizerBase tempText = (NormalizerBase)text.clone();
666
667
// extract the next maxLength characters in the string (we have to do this using the
668
// Normalizer to ensure that our offsets correspond to those the rest of the
669
// iterator is using) and store it in "fragment".
670
tempText.previous();
671
key.setLength(0);
672
int c = tempText.next();
673
while (maxLength > 0 && c != NormalizerBase.DONE) {
674
if (Character.isSupplementaryCodePoint(c)) {
675
key.append(Character.toChars(c));
676
maxLength -= 2;
677
} else {
678
key.append((char)c);
679
--maxLength;
680
}
681
c = tempText.next();
682
}
683
String fragment = key.toString();
684
// now that we have that fragment, iterate through this list looking for the
685
// longest sequence that matches the characters in the actual text. (maxLength
686
// is used here to keep track of the length of the longest sequence)
687
// Upon exit from this loop, maxLength will contain the length of the matching
688
// sequence and order will contain the collation-element value corresponding
689
// to this sequence
690
maxLength = 1;
691
for (int i = list.size() - 1; i > 0; i--) {
692
pair = list.elementAt(i);
693
if (!pair.fwd)
694
continue;
695
696
if (fragment.startsWith(pair.entryName) && pair.entryName.length()
697
> maxLength) {
698
maxLength = pair.entryName.length();
699
order = pair.value;
700
}
701
}
702
703
// seek our current iteration position to the end of the matching sequence
704
// and return the appropriate collation-element value (if there was no matching
705
// sequence, we're already seeked to the right position and order already contains
706
// the correct collation-element value for the single character)
707
while (maxLength > 1) {
708
c = text.next();
709
maxLength -= Character.charCount(c);
710
}
711
return order;
712
}
713
714
/**
715
* Get the ordering priority of the previous contracting character in the
716
* string.
717
* @param ch the starting character of a contracting character token
718
* @return the next contracting character's ordering. Returns NULLORDER
719
* if the end of string is reached.
720
*/
721
private int prevContractChar(int ch)
722
{
723
// This function is identical to nextContractChar(), except that we've
724
// switched things so that the next() and previous() calls on the Normalizer
725
// are switched and so that we skip entry pairs with the fwd flag turned on
726
// rather than off. Notice that we still use append() and startsWith() when
727
// working on the fragment. This is because the entry pairs that are used
728
// in reverse iteration have their names reversed already.
729
Vector<EntryPair> list = ordering.getContractValues(ch);
730
EntryPair pair = list.firstElement();
731
int order = pair.value;
732
733
pair = list.lastElement();
734
int maxLength = pair.entryName.length();
735
736
NormalizerBase tempText = (NormalizerBase)text.clone();
737
738
tempText.next();
739
key.setLength(0);
740
int c = tempText.previous();
741
while (maxLength > 0 && c != NormalizerBase.DONE) {
742
if (Character.isSupplementaryCodePoint(c)) {
743
key.append(Character.toChars(c));
744
maxLength -= 2;
745
} else {
746
key.append((char)c);
747
--maxLength;
748
}
749
c = tempText.previous();
750
}
751
String fragment = key.toString();
752
753
maxLength = 1;
754
for (int i = list.size() - 1; i > 0; i--) {
755
pair = list.elementAt(i);
756
if (pair.fwd)
757
continue;
758
759
if (fragment.startsWith(pair.entryName) && pair.entryName.length()
760
> maxLength) {
761
maxLength = pair.entryName.length();
762
order = pair.value;
763
}
764
}
765
766
while (maxLength > 1) {
767
c = text.previous();
768
maxLength -= Character.charCount(c);
769
}
770
return order;
771
}
772
773
final static int UNMAPPEDCHARVALUE = 0x7FFF0000;
774
775
private NormalizerBase text = null;
776
private int[] buffer = null;
777
private int expIndex = 0;
778
private StringBuffer key = new StringBuffer(5);
779
private int swapOrder = 0;
780
private RBCollationTables ordering;
781
private RuleBasedCollator owner;
782
}
783
784