CoCalc -- CollationElementIterator.java

GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/java/text/CollationElementIterator.java
³⁸⁸²⁹ views
1
/*
2
 * Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26
/*
27
 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28
 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
29
 *
30
 *   The original version of this source code and documentation is copyrighted
31
 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32
 * materials are provided under terms of a License Agreement between Taligent
33
 * and Sun. This technology is protected by multiple US and International
34
 * patents. This notice and attribution to Taligent may not be removed.
35
 *   Taligent is a registered trademark of Taligent, Inc.
36
 *
37
 */
38

39
package java.text;
40

41
import java.lang.Character;
42
import java.util.Vector;
43
import sun.text.CollatorUtilities;
44
import sun.text.normalizer.NormalizerBase;
45

46
/**
47
 * The <code>CollationElementIterator</code> class is used as an iterator
48
 * to walk through each character of an international string. Use the iterator
49
 * to return the ordering priority of the positioned character. The ordering
50
 * priority of a character, which we refer to as a key, defines how a character
51
 * is collated in the given collation object.
52
 *
53
 * <p>
54
 * For example, consider the following in Spanish:
55
 * <blockquote>
56
 * <pre>
57
 * "ca" &rarr; the first key is key('c') and second key is key('a').
58
 * "cha" &rarr; the first key is key('ch') and second key is key('a').
59
 * </pre>
60
 * </blockquote>
61
 * And in German,
62
 * <blockquote>
63
 * <pre>
64
 * "\u00e4b" &rarr; the first key is key('a'), the second key is key('e'), and
65
 * the third key is key('b').
66
 * </pre>
67
 * </blockquote>
68
 * The key of a character is an integer composed of primary order(short),
69
 * secondary order(byte), and tertiary order(byte). Java strictly defines
70
 * the size and signedness of its primitive data types. Therefore, the static
71
 * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and
72
 * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>,
73
 * and <code>short</code> respectively to ensure the correctness of the key
74
 * value.
75
 *
76
 * <p>
77
 * Example of the iterator usage,
78
 * <blockquote>
79
 * <pre>
80
 *
81
 *  String testString = "This is a test";
82
 *  Collator col = Collator.getInstance();
83
 *  if (col instanceof RuleBasedCollator) {
84
 *      RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)col;
85
 *      CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString);
86
 *      int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next());
87
 *          :
88
 *  }
89
 * </pre>
90
 * </blockquote>
91
 *
92
 * <p>
93
 * <code>CollationElementIterator.next</code> returns the collation order
94
 * of the next character. A collation order consists of primary order,
95
 * secondary order and tertiary order. The data type of the collation
96
 * order is <strong>int</strong>. The first 16 bits of a collation order
97
 * is its primary order; the next 8 bits is the secondary order and the
98
 * last 8 bits is the tertiary order.
99
 *
100
 * <p><b>Note:</b> <code>CollationElementIterator</code> is a part of
101
 * <code>RuleBasedCollator</code> implementation. It is only usable
102
 * with <code>RuleBasedCollator</code> instances.
103
 *
104
 * @see                Collator
105
 * @see                RuleBasedCollator
106
 * @author             Helena Shih, Laura Werner, Richard Gillam
107
 */
108
public final class CollationElementIterator
109
{
110
    /**
111
     * Null order which indicates the end of string is reached by the
112
     * cursor.
113
     */
114
    public final static int NULLORDER = 0xffffffff;
115

116
    /**
117
     * CollationElementIterator constructor.  This takes the source string and
118
     * the collation object.  The cursor will walk thru the source string based
119
     * on the predefined collation rules.  If the source string is empty,
120
     * NULLORDER will be returned on the calls to next().
121
     * @param sourceText the source string.
122
     * @param owner the collation object.
123
     */
124
    CollationElementIterator(String sourceText, RuleBasedCollator owner) {
125
        this.owner = owner;
126
        ordering = owner.getTables();
127
        if ( sourceText.length() != 0 ) {
128
            NormalizerBase.Mode mode =
129
                CollatorUtilities.toNormalizerMode(owner.getDecomposition());
130
            text = new NormalizerBase(sourceText, mode);
131
        }
132
    }
133

134
    /**
135
     * CollationElementIterator constructor.  This takes the source string and
136
     * the collation object.  The cursor will walk thru the source string based
137
     * on the predefined collation rules.  If the source string is empty,
138
     * NULLORDER will be returned on the calls to next().
139
     * @param sourceText the source string.
140
     * @param owner the collation object.
141
     */
142
    CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner) {
143
        this.owner = owner;
144
        ordering = owner.getTables();
145
        NormalizerBase.Mode mode =
146
            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
147
        text = new NormalizerBase(sourceText, mode);
148
    }
149

150
    /**
151
     * Resets the cursor to the beginning of the string.  The next call
152
     * to next() will return the first collation element in the string.
153
     */
154
    public void reset()
155
    {
156
        if (text != null) {
157
            text.reset();
158
            NormalizerBase.Mode mode =
159
                CollatorUtilities.toNormalizerMode(owner.getDecomposition());
160
            text.setMode(mode);
161
        }
162
        buffer = null;
163
        expIndex = 0;
164
        swapOrder = 0;
165
    }
166

167
    /**
168
     * Get the next collation element in the string.  <p>This iterator iterates
169
     * over a sequence of collation elements that were built from the string.
170
     * Because there isn't necessarily a one-to-one mapping from characters to
171
     * collation elements, this doesn't mean the same thing as "return the
172
     * collation element [or ordering priority] of the next character in the
173
     * string".</p>
174
     * <p>This function returns the collation element that the iterator is currently
175
     * pointing to and then updates the internal pointer to point to the next element.
176
     * previous() updates the pointer first and then returns the element.  This
177
     * means that when you change direction while iterating (i.e., call next() and
178
     * then call previous(), or call previous() and then call next()), you'll get
179
     * back the same element twice.</p>
180
     *
181
     * @return the next collation element
182
     */
183
    public int next()
184
    {
185
        if (text == null) {
186
            return NULLORDER;
187
        }
188
        NormalizerBase.Mode textMode = text.getMode();
189
        // convert the owner's mode to something the Normalizer understands
190
        NormalizerBase.Mode ownerMode =
191
            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
192
        if (textMode != ownerMode) {
193
            text.setMode(ownerMode);
194
        }
195

196
        // if buffer contains any decomposed char values
197
        // return their strength orders before continuing in
198
        // the Normalizer's CharacterIterator.
199
        if (buffer != null) {
200
            if (expIndex < buffer.length) {
201
                return strengthOrder(buffer[expIndex++]);
202
            } else {
203
                buffer = null;
204
                expIndex = 0;
205
            }
206
        } else if (swapOrder != 0) {
207
            if (Character.isSupplementaryCodePoint(swapOrder)) {
208
                char[] chars = Character.toChars(swapOrder);
209
                swapOrder = chars[1];
210
                return chars[0] << 16;
211
            }
212
            int order = swapOrder << 16;
213
            swapOrder = 0;
214
            return order;
215
        }
216
        int ch  = text.next();
217

218
        // are we at the end of Normalizer's text?
219
        if (ch == NormalizerBase.DONE) {
220
            return NULLORDER;
221
        }
222

223
        int value = ordering.getUnicodeOrder(ch);
224
        if (value == RuleBasedCollator.UNMAPPED) {
225
            swapOrder = ch;
226
            return UNMAPPEDCHARVALUE;
227
        }
228
        else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
229
            value = nextContractChar(ch);
230
        }
231
        if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
232
            buffer = ordering.getExpandValueList(value);
233
            expIndex = 0;
234
            value = buffer[expIndex++];
235
        }
236

237
        if (ordering.isSEAsianSwapping()) {
238
            int consonant;
239
            if (isThaiPreVowel(ch)) {
240
                consonant = text.next();
241
                if (isThaiBaseConsonant(consonant)) {
242
                    buffer = makeReorderedBuffer(consonant, value, buffer, true);
243
                    value = buffer[0];
244
                    expIndex = 1;
245
                } else if (consonant != NormalizerBase.DONE) {
246
                    text.previous();
247
                }
248
            }
249
            if (isLaoPreVowel(ch)) {
250
                consonant = text.next();
251
                if (isLaoBaseConsonant(consonant)) {
252
                    buffer = makeReorderedBuffer(consonant, value, buffer, true);
253
                    value = buffer[0];
254
                    expIndex = 1;
255
                } else if (consonant != NormalizerBase.DONE) {
256
                    text.previous();
257
                }
258
            }
259
        }
260

261
        return strengthOrder(value);
262
    }
263

264
    /**
265
     * Get the previous collation element in the string.  <p>This iterator iterates
266
     * over a sequence of collation elements that were built from the string.
267
     * Because there isn't necessarily a one-to-one mapping from characters to
268
     * collation elements, this doesn't mean the same thing as "return the
269
     * collation element [or ordering priority] of the previous character in the
270
     * string".</p>
271
     * <p>This function updates the iterator's internal pointer to point to the
272
     * collation element preceding the one it's currently pointing to and then
273
     * returns that element, while next() returns the current element and then
274
     * updates the pointer.  This means that when you change direction while
275
     * iterating (i.e., call next() and then call previous(), or call previous()
276
     * and then call next()), you'll get back the same element twice.</p>
277
     *
278
     * @return the previous collation element
279
     * @since 1.2
280
     */
281
    public int previous()
282
    {
283
        if (text == null) {
284
            return NULLORDER;
285
        }
286
        NormalizerBase.Mode textMode = text.getMode();
287
        // convert the owner's mode to something the Normalizer understands
288
        NormalizerBase.Mode ownerMode =
289
            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
290
        if (textMode != ownerMode) {
291
            text.setMode(ownerMode);
292
        }
293
        if (buffer != null) {
294
            if (expIndex > 0) {
295
                return strengthOrder(buffer[--expIndex]);
296
            } else {
297
                buffer = null;
298
                expIndex = 0;
299
            }
300
        } else if (swapOrder != 0) {
301
            if (Character.isSupplementaryCodePoint(swapOrder)) {
302
                char[] chars = Character.toChars(swapOrder);
303
                swapOrder = chars[1];
304
                return chars[0] << 16;
305
            }
306
            int order = swapOrder << 16;
307
            swapOrder = 0;
308
            return order;
309
        }
310
        int ch = text.previous();
311
        if (ch == NormalizerBase.DONE) {
312
            return NULLORDER;
313
        }
314

315
        int value = ordering.getUnicodeOrder(ch);
316

317
        if (value == RuleBasedCollator.UNMAPPED) {
318
            swapOrder = UNMAPPEDCHARVALUE;
319
            return ch;
320
        } else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
321
            value = prevContractChar(ch);
322
        }
323
        if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
324
            buffer = ordering.getExpandValueList(value);
325
            expIndex = buffer.length;
326
            value = buffer[--expIndex];
327
        }
328

329
        if (ordering.isSEAsianSwapping()) {
330
            int vowel;
331
            if (isThaiBaseConsonant(ch)) {
332
                vowel = text.previous();
333
                if (isThaiPreVowel(vowel)) {
334
                    buffer = makeReorderedBuffer(vowel, value, buffer, false);
335
                    expIndex = buffer.length - 1;
336
                    value = buffer[expIndex];
337
                } else {
338
                    text.next();
339
                }
340
            }
341
            if (isLaoBaseConsonant(ch)) {
342
                vowel = text.previous();
343
                if (isLaoPreVowel(vowel)) {
344
                    buffer = makeReorderedBuffer(vowel, value, buffer, false);
345
                    expIndex = buffer.length - 1;
346
                    value = buffer[expIndex];
347
                } else {
348
                    text.next();
349
                }
350
            }
351
        }
352

353
        return strengthOrder(value);
354
    }
355

356
    /**
357
     * Return the primary component of a collation element.
358
     * @param order the collation element
359
     * @return the element's primary component
360
     */
361
    public final static int primaryOrder(int order)
362
    {
363
        order &= RBCollationTables.PRIMARYORDERMASK;
364
        return (order >>> RBCollationTables.PRIMARYORDERSHIFT);
365
    }
366
    /**
367
     * Return the secondary component of a collation element.
368
     * @param order the collation element
369
     * @return the element's secondary component
370
     */
371
    public final static short secondaryOrder(int order)
372
    {
373
        order = order & RBCollationTables.SECONDARYORDERMASK;
374
        return ((short)(order >> RBCollationTables.SECONDARYORDERSHIFT));
375
    }
376
    /**
377
     * Return the tertiary component of a collation element.
378
     * @param order the collation element
379
     * @return the element's tertiary component
380
     */
381
    public final static short tertiaryOrder(int order)
382
    {
383
        return ((short)(order &= RBCollationTables.TERTIARYORDERMASK));
384
    }
385

386
    /**
387
     *  Get the comparison order in the desired strength.  Ignore the other
388
     *  differences.
389
     *  @param order The order value
390
     */
391
    final int strengthOrder(int order)
392
    {
393
        int s = owner.getStrength();
394
        if (s == Collator.PRIMARY)
395
        {
396
            order &= RBCollationTables.PRIMARYDIFFERENCEONLY;
397
        } else if (s == Collator.SECONDARY)
398
        {
399
            order &= RBCollationTables.SECONDARYDIFFERENCEONLY;
400
        }
401
        return order;
402
    }
403

404
    /**
405
     * Sets the iterator to point to the collation element corresponding to
406
     * the specified character (the parameter is a CHARACTER offset in the
407
     * original string, not an offset into its corresponding sequence of
408
     * collation elements).  The value returned by the next call to next()
409
     * will be the collation element corresponding to the specified position
410
     * in the text.  If that position is in the middle of a contracting
411
     * character sequence, the result of the next call to next() is the
412
     * collation element for that sequence.  This means that getOffset()
413
     * is not guaranteed to return the same value as was passed to a preceding
414
     * call to setOffset().
415
     *
416
     * @param newOffset The new character offset into the original text.
417
     * @since 1.2
418
     */
419
    @SuppressWarnings("deprecation") // getBeginIndex, getEndIndex and setIndex are deprecated
420
    public void setOffset(int newOffset)
421
    {
422
        if (text != null) {
423
            if (newOffset < text.getBeginIndex()
424
                || newOffset >= text.getEndIndex()) {
425
                    text.setIndexOnly(newOffset);
426
            } else {
427
                int c = text.setIndex(newOffset);
428

429
                // if the desired character isn't used in a contracting character
430
                // sequence, bypass all the backing-up logic-- we're sitting on
431
                // the right character already
432
                if (ordering.usedInContractSeq(c)) {
433
                    // walk backwards through the string until we see a character
434
                    // that DOESN'T participate in a contracting character sequence
435
                    while (ordering.usedInContractSeq(c)) {
436
                        c = text.previous();
437
                    }
438
                    // now walk forward using this object's next() method until
439
                    // we pass the starting point and set our current position
440
                    // to the beginning of the last "character" before or at
441
                    // our starting position
442
                    int last = text.getIndex();
443
                    while (text.getIndex() <= newOffset) {
444
                        last = text.getIndex();
445
                        next();
446
                    }
447
                    text.setIndexOnly(last);
448
                    // we don't need this, since last is the last index
449
                    // that is the starting of the contraction which encompass
450
                    // newOffset
451
                    // text.previous();
452
                }
453
            }
454
        }
455
        buffer = null;
456
        expIndex = 0;
457
        swapOrder = 0;
458
    }
459

460
    /**
461
     * Returns the character offset in the original text corresponding to the next
462
     * collation element.  (That is, getOffset() returns the position in the text
463
     * corresponding to the collation element that will be returned by the next
464
     * call to next().)  This value will always be the index of the FIRST character
465
     * corresponding to the collation element (a contracting character sequence is
466
     * when two or more characters all correspond to the same collation element).
467
     * This means if you do setOffset(x) followed immediately by getOffset(), getOffset()
468
     * won't necessarily return x.
469
     *
470
     * @return The character offset in the original text corresponding to the collation
471
     * element that will be returned by the next call to next().
472
     * @since 1.2
473
     */
474
    public int getOffset()
475
    {
476
        return (text != null) ? text.getIndex() : 0;
477
    }
478

479

480
    /**
481
     * Return the maximum length of any expansion sequences that end
482
     * with the specified comparison order.
483
     * @param order a collation order returned by previous or next.
484
     * @return the maximum length of any expansion sequences ending
485
     *         with the specified order.
486
     * @since 1.2
487
     */
488
    public int getMaxExpansion(int order)
489
    {
490
        return ordering.getMaxExpansion(order);
491
    }
492

493
    /**
494
     * Set a new string over which to iterate.
495
     *
496
     * @param source  the new source text
497
     * @since 1.2
498
     */
499
    public void setText(String source)
500
    {
501
        buffer = null;
502
        swapOrder = 0;
503
        expIndex = 0;
504
        NormalizerBase.Mode mode =
505
            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
506
        if (text == null) {
507
            text = new NormalizerBase(source, mode);
508
        } else {
509
            text.setMode(mode);
510
            text.setText(source);
511
        }
512
    }
513

514
    /**
515
     * Set a new string over which to iterate.
516
     *
517
     * @param source  the new source text.
518
     * @since 1.2
519
     */
520
    public void setText(CharacterIterator source)
521
    {
522
        buffer = null;
523
        swapOrder = 0;
524
        expIndex = 0;
525
        NormalizerBase.Mode mode =
526
            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
527
        if (text == null) {
528
            text = new NormalizerBase(source, mode);
529
        } else {
530
            text.setMode(mode);
531
            text.setText(source);
532
        }
533
    }
534

535
    //============================================================
536
    // privates
537
    //============================================================
538

539
    /**
540
     * Determine if a character is a Thai vowel (which sorts after
541
     * its base consonant).
542
     */
543
    private final static boolean isThaiPreVowel(int ch) {
544
        return (ch >= 0x0e40) && (ch <= 0x0e44);
545
    }
546

547
    /**
548
     * Determine if a character is a Thai base consonant
549
     */
550
    private final static boolean isThaiBaseConsonant(int ch) {
551
        return (ch >= 0x0e01) && (ch <= 0x0e2e);
552
    }
553

554
    /**
555
     * Determine if a character is a Lao vowel (which sorts after
556
     * its base consonant).
557
     */
558
    private final static boolean isLaoPreVowel(int ch) {
559
        return (ch >= 0x0ec0) && (ch <= 0x0ec4);
560
    }
561

562
    /**
563
     * Determine if a character is a Lao base consonant
564
     */
565
    private final static boolean isLaoBaseConsonant(int ch) {
566
        return (ch >= 0x0e81) && (ch <= 0x0eae);
567
    }
568

569
    /**
570
     * This method produces a buffer which contains the collation
571
     * elements for the two characters, with colFirst's values preceding
572
     * another character's.  Presumably, the other character precedes colFirst
573
     * in logical order (otherwise you wouldn't need this method would you?).
574
     * The assumption is that the other char's value(s) have already been
575
     * computed.  If this char has a single element it is passed to this
576
     * method as lastValue, and lastExpansion is null.  If it has an
577
     * expansion it is passed in lastExpansion, and colLastValue is ignored.
578
     */
579
    private int[] makeReorderedBuffer(int colFirst,
580
                                      int lastValue,
581
                                      int[] lastExpansion,
582
                                      boolean forward) {
583

584
        int[] result;
585

586
        int firstValue = ordering.getUnicodeOrder(colFirst);
587
        if (firstValue >= RuleBasedCollator.CONTRACTCHARINDEX) {
588
            firstValue = forward? nextContractChar(colFirst) : prevContractChar(colFirst);
589
        }
590

591
        int[] firstExpansion = null;
592
        if (firstValue >= RuleBasedCollator.EXPANDCHARINDEX) {
593
            firstExpansion = ordering.getExpandValueList(firstValue);
594
        }
595

596
        if (!forward) {
597
            int temp1 = firstValue;
598
            firstValue = lastValue;
599
            lastValue = temp1;
600
            int[] temp2 = firstExpansion;
601
            firstExpansion = lastExpansion;
602
            lastExpansion = temp2;
603
        }
604

605
        if (firstExpansion == null && lastExpansion == null) {
606
            result = new int [2];
607
            result[0] = firstValue;
608
            result[1] = lastValue;
609
        }
610
        else {
611
            int firstLength = firstExpansion==null? 1 : firstExpansion.length;
612
            int lastLength = lastExpansion==null? 1 : lastExpansion.length;
613
            result = new int[firstLength + lastLength];
614

615
            if (firstExpansion == null) {
616
                result[0] = firstValue;
617
            }
618
            else {
619
                System.arraycopy(firstExpansion, 0, result, 0, firstLength);
620
            }
621

622
            if (lastExpansion == null) {
623
                result[firstLength] = lastValue;
624
            }
625
            else {
626
                System.arraycopy(lastExpansion, 0, result, firstLength, lastLength);
627
            }
628
        }
629

630
        return result;
631
    }
632

633
    /**
634
     *  Check if a comparison order is ignorable.
635
     *  @return true if a character is ignorable, false otherwise.
636
     */
637
    final static boolean isIgnorable(int order)
638
    {
639
        return ((primaryOrder(order) == 0) ? true : false);
640
    }
641

642
    /**
643
     * Get the ordering priority of the next contracting character in the
644
     * string.
645
     * @param ch the starting character of a contracting character token
646
     * @return the next contracting character's ordering.  Returns NULLORDER
647
     * if the end of string is reached.
648
     */
649
    private int nextContractChar(int ch)
650
    {
651
        // First get the ordering of this single character,
652
        // which is always the first element in the list
653
        Vector<EntryPair> list = ordering.getContractValues(ch);
654
        EntryPair pair = list.firstElement();
655
        int order = pair.value;
656

657
        // find out the length of the longest contracting character sequence in the list.
658
        // There's logic in the builder code to make sure the longest sequence is always
659
        // the last.
660
        pair = list.lastElement();
661
        int maxLength = pair.entryName.length();
662

663
        // (the Normalizer is cloned here so that the seeking we do in the next loop
664
        // won't affect our real position in the text)
665
        NormalizerBase tempText = (NormalizerBase)text.clone();
666

667
        // extract the next maxLength characters in the string (we have to do this using the
668
        // Normalizer to ensure that our offsets correspond to those the rest of the
669
        // iterator is using) and store it in "fragment".
670
        tempText.previous();
671
        key.setLength(0);
672
        int c = tempText.next();
673
        while (maxLength > 0 && c != NormalizerBase.DONE) {
674
            if (Character.isSupplementaryCodePoint(c)) {
675
                key.append(Character.toChars(c));
676
                maxLength -= 2;
677
            } else {
678
                key.append((char)c);
679
                --maxLength;
680
            }
681
            c = tempText.next();
682
        }
683
        String fragment = key.toString();
684
        // now that we have that fragment, iterate through this list looking for the
685
        // longest sequence that matches the characters in the actual text.  (maxLength
686
        // is used here to keep track of the length of the longest sequence)
687
        // Upon exit from this loop, maxLength will contain the length of the matching
688
        // sequence and order will contain the collation-element value corresponding
689
        // to this sequence
690
        maxLength = 1;
691
        for (int i = list.size() - 1; i > 0; i--) {
692
            pair = list.elementAt(i);
693
            if (!pair.fwd)
694
                continue;
695

696
            if (fragment.startsWith(pair.entryName) && pair.entryName.length()
697
                    > maxLength) {
698
                maxLength = pair.entryName.length();
699
                order = pair.value;
700
            }
701
        }
702

703
        // seek our current iteration position to the end of the matching sequence
704
        // and return the appropriate collation-element value (if there was no matching
705
        // sequence, we're already seeked to the right position and order already contains
706
        // the correct collation-element value for the single character)
707
        while (maxLength > 1) {
708
            c = text.next();
709
            maxLength -= Character.charCount(c);
710
        }
711
        return order;
712
    }
713

714
    /**
715
     * Get the ordering priority of the previous contracting character in the
716
     * string.
717
     * @param ch the starting character of a contracting character token
718
     * @return the next contracting character's ordering.  Returns NULLORDER
719
     * if the end of string is reached.
720
     */
721
    private int prevContractChar(int ch)
722
    {
723
        // This function is identical to nextContractChar(), except that we've
724
        // switched things so that the next() and previous() calls on the Normalizer
725
        // are switched and so that we skip entry pairs with the fwd flag turned on
726
        // rather than off.  Notice that we still use append() and startsWith() when
727
        // working on the fragment.  This is because the entry pairs that are used
728
        // in reverse iteration have their names reversed already.
729
        Vector<EntryPair> list = ordering.getContractValues(ch);
730
        EntryPair pair = list.firstElement();
731
        int order = pair.value;
732

733
        pair = list.lastElement();
734
        int maxLength = pair.entryName.length();
735

736
        NormalizerBase tempText = (NormalizerBase)text.clone();
737

738
        tempText.next();
739
        key.setLength(0);
740
        int c = tempText.previous();
741
        while (maxLength > 0 && c != NormalizerBase.DONE) {
742
            if (Character.isSupplementaryCodePoint(c)) {
743
                key.append(Character.toChars(c));
744
                maxLength -= 2;
745
            } else {
746
                key.append((char)c);
747
                --maxLength;
748
            }
749
            c = tempText.previous();
750
        }
751
        String fragment = key.toString();
752

753
        maxLength = 1;
754
        for (int i = list.size() - 1; i > 0; i--) {
755
            pair = list.elementAt(i);
756
            if (pair.fwd)
757
                continue;
758

759
            if (fragment.startsWith(pair.entryName) && pair.entryName.length()
760
                    > maxLength) {
761
                maxLength = pair.entryName.length();
762
                order = pair.value;
763
            }
764
        }
765

766
        while (maxLength > 1) {
767
            c = text.previous();
768
            maxLength -= Character.charCount(c);
769
        }
770
        return order;
771
    }
772

773
    final static int UNMAPPEDCHARVALUE = 0x7FFF0000;
774

775
    private NormalizerBase text = null;
776
    private int[] buffer = null;
777
    private int expIndex = 0;
778
    private StringBuffer key = new StringBuffer(5);
779
    private int swapOrder = 0;
780
    private RBCollationTables ordering;
781
    private RuleBasedCollator owner;
782
}
783

784
Product

Resources

Company