Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/test/java/text/Collator/Regression.java
47182 views
1
/*
2
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation.
8
*
9
* This code is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
* version 2 for more details (a copy is included in the LICENSE file that
13
* accompanied this code).
14
*
15
* You should have received a copy of the GNU General Public License version
16
* 2 along with this work; if not, write to the Free Software Foundation,
17
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
*
19
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
* or visit www.oracle.com if you need additional information or have any
21
* questions.
22
*/
23
24
/**
25
* @test
26
* @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154
27
* 4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241
28
* 4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736
29
* 4133509 4139572 4141640 4179126 4179686 4244884 4663220
30
* @library /java/text/testlib
31
* @summary Regression tests for Collation and associated classes
32
*/
33
/*
34
(C) Copyright Taligent, Inc. 1996 - All Rights Reserved
35
(C) Copyright IBM Corp. 1996 - All Rights Reserved
36
37
The original version of this source code and documentation is copyrighted and
38
owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are
39
provided under terms of a License Agreement between Taligent and Sun. This
40
technology is protected by multiple US and International patents. This notice and
41
attribution to Taligent may not be removed.
42
Taligent is a registered trademark of Taligent, Inc.
43
*/
44
45
import java.text.*;
46
import java.util.Locale;
47
import java.util.Vector;
48
49
50
public class Regression extends CollatorTest {
51
52
public static void main(String[] args) throws Exception {
53
new Regression().run(args);
54
}
55
56
// CollationElementIterator.reset() doesn't work
57
//
58
public void Test4048446() {
59
CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
60
CollationElementIterator i2 = en_us.getCollationElementIterator(test1);
61
62
while ( i1.next() != CollationElementIterator.NULLORDER ) {
63
}
64
i1.reset();
65
66
assertEqual(i1, i2);
67
}
68
69
70
// Collator -> rules -> Collator round-trip broken for expanding characters
71
//
72
public void Test4051866() throws ParseException {
73
// Build a collator containing expanding characters
74
RuleBasedCollator c1 = new RuleBasedCollator("< o "
75
+"& oe ,o\u3080"
76
+"& oe ,\u1530 ,O"
77
+"& OE ,O\u3080"
78
+"& OE ,\u1520"
79
+"< p ,P");
80
81
// Build another using the rules from the first
82
RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules());
83
84
// Make sure they're the same
85
if (!c1.getRules().equals(c2.getRules())) {
86
errln("Rules are not equal");
87
}
88
}
89
90
// Collator thinks "black-bird" == "black"
91
//
92
public void Test4053636() {
93
if (en_us.equals("black-bird","black")) {
94
errln("black-bird == black");
95
}
96
}
97
98
99
// CollationElementIterator will not work correctly if the associated
100
// Collator object's mode is changed
101
//
102
public void Test4054238() {
103
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
104
105
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
106
CollationElementIterator i1 = en_us.getCollationElementIterator(test3);
107
108
c.setDecomposition(Collator.NO_DECOMPOSITION);
109
CollationElementIterator i2 = en_us.getCollationElementIterator(test3);
110
111
// At this point, BOTH iterators should use NO_DECOMPOSITION, since the
112
// collator itself is in that mode
113
assertEqual(i1, i2);
114
}
115
116
// Collator.IDENTICAL documented but not implemented
117
//
118
public void Test4054734() {
119
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
120
try {
121
c.setStrength(Collator.IDENTICAL);
122
}
123
catch (Exception e) {
124
errln("Caught " + e.toString() + " setting Collator.IDENTICAL");
125
}
126
127
String[] decomp = {
128
"\u0001", "<", "\u0002",
129
"\u0001", "=", "\u0001",
130
"A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
131
"\u00C0", "=", "A\u0300" // Decomp should make these equal
132
};
133
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
134
compareArray(c, decomp);
135
136
String[] nodecomp = {
137
"\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave
138
};
139
c.setDecomposition(Collator.NO_DECOMPOSITION);
140
compareArray(c, nodecomp);
141
}
142
143
// Full Decomposition mode not implemented
144
//
145
public void Test4054736() {
146
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
147
c.setDecomposition(Collator.FULL_DECOMPOSITION);
148
149
String[] tests = {
150
"\uFB4f", "=", "\u05D0\u05DC", // Alef-Lamed vs. Alef, Lamed
151
};
152
153
compareArray(c, tests);
154
}
155
156
// Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean
157
//
158
public void Test4058613() {
159
// Creating a default collator doesn't work when Korean is the default
160
// locale
161
162
Locale oldDefault = Locale.getDefault();
163
164
Locale.setDefault( Locale.KOREAN );
165
try {
166
Collator c = Collator.getInstance();
167
168
// Since the fix to this bug was to turn of decomposition for Korean collators,
169
// ensure that's what we got
170
if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
171
errln("Decomposition is not set to NO_DECOMPOSITION");
172
}
173
}
174
finally {
175
Locale.setDefault(oldDefault);
176
}
177
}
178
179
// RuleBasedCollator.getRules does not return the exact pattern as input
180
// for expanding character sequences
181
//
182
public void Test4059820() {
183
RuleBasedCollator c = null;
184
try {
185
c = new RuleBasedCollator("< a < b , c/a < d < z");
186
} catch (ParseException e) {
187
errln("Exception building collator: " + e.toString());
188
return;
189
}
190
if ( c.getRules().indexOf("c/a") == -1) {
191
errln("returned rules do not contain 'c/a'");
192
}
193
}
194
195
// MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
196
//
197
public void Test4060154() {
198
RuleBasedCollator c = null;
199
try {
200
c = new RuleBasedCollator("< g, G < h, H < i, I < j, J"
201
+ " & H < \u0131, \u0130, i, I" );
202
} catch (ParseException e) {
203
errln("Exception building collator: " + e.toString());
204
return;
205
}
206
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
207
208
String[] tertiary = {
209
"A", "<", "B",
210
"H", "<", "\u0131",
211
"H", "<", "I",
212
"\u0131", "<", "\u0130",
213
"\u0130", "<", "i",
214
"\u0130", ">", "H",
215
};
216
c.setStrength(Collator.TERTIARY);
217
compareArray(c, tertiary);
218
219
String[] secondary = {
220
"H", "<", "I",
221
"\u0131", "=", "\u0130",
222
};
223
c.setStrength(Collator.PRIMARY);
224
compareArray(c, secondary);
225
};
226
227
// Secondary/Tertiary comparison incorrect in French Secondary
228
//
229
public void Test4062418() throws ParseException {
230
RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
231
c.setStrength(Collator.SECONDARY);
232
233
String[] tests = {
234
"p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
235
};
236
237
compareArray(c, tests);
238
}
239
240
// Collator.compare() method broken if either string contains spaces
241
//
242
public void Test4065540() {
243
if (en_us.compare("abcd e", "abcd f") == 0) {
244
errln("'abcd e' == 'abcd f'");
245
}
246
}
247
248
// Unicode characters need to be recursively decomposed to get the
249
// correct result. For example,
250
// u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
251
//
252
public void Test4066189() {
253
String test1 = "\u1EB1";
254
String test2 = "a\u0306\u0300";
255
256
RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone();
257
c1.setDecomposition(Collator.FULL_DECOMPOSITION);
258
CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
259
260
RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone();
261
c2.setDecomposition(Collator.NO_DECOMPOSITION);
262
CollationElementIterator i2 = en_us.getCollationElementIterator(test2);
263
264
assertEqual(i1, i2);
265
}
266
267
// French secondary collation checking at the end of compare iteration fails
268
//
269
public void Test4066696() {
270
RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
271
c.setStrength(Collator.SECONDARY);
272
273
String[] tests = {
274
"\u00e0", "<", "\u01fa", // a-grave < A-ring-acute
275
};
276
277
compareArray(c, tests);
278
}
279
280
281
// Bad canonicalization of same-class combining characters
282
//
283
public void Test4076676() {
284
// These combining characters are all in the same class, so they should not
285
// be reordered, and they should compare as unequal.
286
String s1 = "A\u0301\u0302\u0300";
287
String s2 = "A\u0302\u0300\u0301";
288
289
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
290
c.setStrength(Collator.TERTIARY);
291
292
if (c.compare(s1,s2) == 0) {
293
errln("Same-class combining chars were reordered");
294
}
295
}
296
297
298
// RuleBasedCollator.equals(null) throws NullPointerException
299
//
300
public void Test4079231() {
301
try {
302
if (en_us.equals(null)) {
303
errln("en_us.equals(null) returned true");
304
}
305
}
306
catch (Exception e) {
307
errln("en_us.equals(null) threw " + e.toString());
308
}
309
}
310
311
// RuleBasedCollator breaks on "< a < bb" rule
312
//
313
public void Test4078588() throws ParseException {
314
RuleBasedCollator rbc=new RuleBasedCollator("< a < bb");
315
316
int result = rbc.compare("a","bb");
317
318
if (result != -1) {
319
errln("Compare(a,bb) returned " + result + "; expected -1");
320
}
321
}
322
323
// Combining characters in different classes not reordered properly.
324
//
325
public void Test4081866() throws ParseException {
326
// These combining characters are all in different classes,
327
// so they should be reordered and the strings should compare as equal.
328
String s1 = "A\u0300\u0316\u0327\u0315";
329
String s2 = "A\u0327\u0316\u0315\u0300";
330
331
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
332
c.setStrength(Collator.TERTIARY);
333
334
// Now that the default collators are set to NO_DECOMPOSITION
335
// (as a result of fixing bug 4114077), we must set it explicitly
336
// when we're testing reordering behavior. -- lwerner, 5/5/98
337
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
338
339
if (c.compare(s1,s2) != 0) {
340
errln("Combining chars were not reordered");
341
}
342
}
343
344
// string comparison errors in Scandinavian collators
345
//
346
public void Test4087241() {
347
RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(
348
new Locale("da", "DK"));
349
c.setStrength(Collator.SECONDARY);
350
351
String[] tests = {
352
"\u007a", "<", "\u00e6", // z < ae
353
"a\u0308", "<", "a\u030a", // a-unlaut < a-ring
354
"Y", "<", "u\u0308", // Y < u-umlaut
355
};
356
357
compareArray(c, tests);
358
}
359
360
// CollationKey takes ignorable strings into account when it shouldn't
361
//
362
public void Test4087243() {
363
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
364
c.setStrength(Collator.TERTIARY);
365
366
String[] tests = {
367
"123", "=", "123\u0001", // 1 2 3 = 1 2 3 ctrl-A
368
};
369
370
compareArray(c, tests);
371
}
372
373
// Mu/micro conflict
374
// Micro symbol and greek lowercase letter Mu should sort identically
375
//
376
public void Test4092260() {
377
Collator c = Collator.getInstance(new Locale("el", ""));
378
379
// will only be equal when FULL_DECOMPOSITION is used
380
c.setDecomposition(Collator.FULL_DECOMPOSITION);
381
382
String[] tests = {
383
"\u00B5", "=", "\u03BC",
384
};
385
386
compareArray(c, tests);
387
}
388
389
void Test4095316() {
390
Collator c = Collator.getInstance(new Locale("el", "GR"));
391
c.setStrength(Collator.TERTIARY);
392
// javadocs for RuleBasedCollator clearly specify that characters containing compatability
393
// chars MUST use FULL_DECOMPOSITION to get accurate comparisons.
394
c.setDecomposition(Collator.FULL_DECOMPOSITION);
395
396
String[] tests = {
397
"\u03D4", "=", "\u03AB",
398
};
399
400
compareArray(c, tests);
401
}
402
403
public void Test4101940() {
404
try {
405
RuleBasedCollator c = new RuleBasedCollator("< a < b");
406
CollationElementIterator i = c.getCollationElementIterator("");
407
i.reset();
408
409
if (i.next() != i.NULLORDER) {
410
errln("next did not return NULLORDER");
411
}
412
}
413
catch (Exception e) {
414
errln("Caught " + e );
415
}
416
}
417
418
// Collator.compare not handling spaces properly
419
//
420
public void Test4103436() {
421
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
422
c.setStrength(Collator.TERTIARY);
423
424
String[] tests = {
425
"file", "<", "file access",
426
"file", "<", "fileaccess",
427
};
428
429
compareArray(c, tests);
430
}
431
432
// Collation not Unicode conformant with Hangul syllables
433
//
434
public void Test4114076() {
435
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
436
c.setStrength(Collator.TERTIARY);
437
438
//
439
// With Canonical decomposition, Hangul syllables should get decomposed
440
// into Jamo, but Jamo characters should not be decomposed into
441
// conjoining Jamo
442
//
443
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
444
String[] test1 = {
445
"\ud4db", "=", "\u1111\u1171\u11b6",
446
};
447
compareArray(c, test1);
448
449
// Full decomposition result should be the same as canonical decomposition
450
// for all hangul.
451
c.setDecomposition(Collator.FULL_DECOMPOSITION);
452
compareArray(c, test1);
453
454
}
455
456
457
// Collator.getCollationKey was hanging on certain character sequences
458
//
459
public void Test4124632() throws Exception {
460
Collator coll = Collator.getInstance(Locale.JAPAN);
461
462
try {
463
coll.getCollationKey("A\u0308bc");
464
} catch (OutOfMemoryError e) {
465
errln("Ran out of memory -- probably an infinite loop");
466
}
467
}
468
469
// sort order of french words with multiple accents has errors
470
//
471
public void Test4132736() {
472
Collator c = Collator.getInstance(Locale.FRANCE);
473
474
String[] test1 = {
475
"e\u0300e\u0301", "<", "e\u0301e\u0300",
476
"e\u0300\u0301", ">", "e\u0301\u0300",
477
};
478
compareArray(c, test1);
479
}
480
481
// The sorting using java.text.CollationKey is not in the exact order
482
//
483
public void Test4133509() {
484
String[] test1 = {
485
"Exception", "<", "ExceptionInInitializerError",
486
"Graphics", "<", "GraphicsEnvironment",
487
"String", "<", "StringBuffer",
488
};
489
compareArray(en_us, test1);
490
}
491
492
// Collation with decomposition off doesn't work for Europe
493
//
494
public void Test4114077() {
495
// Ensure that we get the same results with decomposition off
496
// as we do with it on....
497
498
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
499
c.setStrength(Collator.TERTIARY);
500
501
String[] test1 = {
502
"\u00C0", "=", "A\u0300", // Should be equivalent
503
"p\u00eache", ">", "p\u00e9ch\u00e9",
504
"\u0204", "=", "E\u030F",
505
"\u01fa", "=", "A\u030a\u0301", // a-ring-acute -> a-ring, acute
506
// -> a, ring, acute
507
"A\u0300\u0316", "<", "A\u0316\u0300", // No reordering --> unequal
508
};
509
c.setDecomposition(Collator.NO_DECOMPOSITION);
510
compareArray(c, test1);
511
512
String[] test2 = {
513
"A\u0300\u0316", "=", "A\u0316\u0300", // Reordering --> equal
514
};
515
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
516
compareArray(c, test2);
517
}
518
519
// Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
520
//
521
public void Test4141640() {
522
//
523
// Rather than just creating a Swedish collator, we might as well
524
// try to instantiate one for every locale available on the system
525
// in order to prevent this sort of bug from cropping up in the future
526
//
527
Locale[] locales = Collator.getAvailableLocales();
528
529
for (int i = 0; i < locales.length; i++) {
530
try {
531
Collator c = Collator.getInstance(locales[i]);
532
} catch (Exception e) {
533
errln("Caught " + e + " creating collator for " + locales[i]);
534
}
535
}
536
}
537
538
// getCollationKey throws exception for spanish text
539
// Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
540
//
541
public void Test4139572() {
542
//
543
// Code pasted straight from the bug report
544
//
545
// create spanish locale and collator
546
Locale l = new Locale("es", "es");
547
Collator col = Collator.getInstance(l);
548
549
// this spanish phrase kills it!
550
col.getCollationKey("Nombre De Objeto");
551
}
552
553
// RuleBasedCollator doesn't use getCollationElementIterator internally
554
//
555
public void Test4146160() throws ParseException {
556
//
557
// Use a custom collator class whose getCollationElementIterator
558
// methods increment a count....
559
//
560
My4146160Collator.count = 0;
561
new My4146160Collator().getCollationKey("1");
562
if (My4146160Collator.count < 1) {
563
errln("getCollationElementIterator not called");
564
}
565
566
My4146160Collator.count = 0;
567
new My4146160Collator().compare("1", "2");
568
if (My4146160Collator.count < 1) {
569
errln("getCollationElementIterator not called");
570
}
571
}
572
573
static class My4146160Collator extends RuleBasedCollator {
574
public My4146160Collator() throws ParseException {
575
super(Regression.en_us.getRules());
576
}
577
578
public CollationElementIterator getCollationElementIterator(
579
String text) {
580
count++;
581
return super.getCollationElementIterator(text);
582
}
583
public CollationElementIterator getCollationElementIterator(
584
CharacterIterator text) {
585
count++;
586
return super.getCollationElementIterator(text);
587
}
588
589
public static int count = 0;
590
};
591
592
// CollationElementIterator.previous broken for expanding char sequences
593
//
594
public void Test4179686() throws ParseException {
595
596
// Create a collator with a few expanding character sequences in it....
597
RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
598
+ " & ae ; \u00e4 & AE ; \u00c4"
599
+ " & oe ; \u00f6 & OE ; \u00d6"
600
+ " & ue ; \u00fc & UE ; \u00dc");
601
602
String text = "T\u00f6ne"; // o-umlaut
603
604
CollationElementIterator iter = coll.getCollationElementIterator(text);
605
Vector elements = new Vector();
606
int elem;
607
608
// Iterate forward and collect all of the elements into a Vector
609
while ((elem = iter.next()) != iter.NULLORDER) {
610
elements.addElement(new Integer(elem));
611
}
612
613
// Now iterate backward and make sure they're the same
614
int index = elements.size() - 1;
615
while ((elem = iter.previous()) != iter.NULLORDER) {
616
int expect = ((Integer)elements.elementAt(index)).intValue();
617
618
if (elem != expect) {
619
errln("Mismatch at index " + index
620
+ ": got " + Integer.toString(elem,16)
621
+ ", expected " + Integer.toString(expect,16));
622
}
623
index--;
624
}
625
}
626
627
public void Test4244884() throws ParseException {
628
RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
629
coll = new RuleBasedCollator(coll.getRules()
630
+ " & C < ch , cH , Ch , CH < cat < crunchy");
631
632
String[] testStrings = new String[] {
633
"car",
634
"cave",
635
"clamp",
636
"cramp",
637
"czar",
638
"church",
639
"catalogue",
640
"crunchy",
641
"dog"
642
};
643
644
for (int i = 1; i < testStrings.length; i++) {
645
if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
646
errln("error: \"" + testStrings[i - 1]
647
+ "\" is greater than or equal to \"" + testStrings[i]
648
+ "\".");
649
}
650
}
651
}
652
653
public void Test4179216() throws ParseException {
654
// you can position a CollationElementIterator in the middle of
655
// a contracting character sequence, yielding a bogus collation
656
// element
657
RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
658
coll = new RuleBasedCollator(coll.getRules()
659
+ " & C < ch , cH , Ch , CH < cat < crunchy");
660
String testText = "church church catcatcher runcrunchynchy";
661
CollationElementIterator iter = coll.getCollationElementIterator(
662
testText);
663
664
// test that the "ch" combination works properly
665
iter.setOffset(4);
666
int elt4 = CollationElementIterator.primaryOrder(iter.next());
667
668
iter.reset();
669
int elt0 = CollationElementIterator.primaryOrder(iter.next());
670
671
iter.setOffset(5);
672
int elt5 = CollationElementIterator.primaryOrder(iter.next());
673
674
if (elt4 != elt0 || elt5 != elt0)
675
errln("The collation elements at positions 0 (" + elt0 + "), 4 ("
676
+ elt4 + "), and 5 (" + elt5 + ") don't match.");
677
678
// test that the "cat" combination works properly
679
iter.setOffset(14);
680
int elt14 = CollationElementIterator.primaryOrder(iter.next());
681
682
iter.setOffset(15);
683
int elt15 = CollationElementIterator.primaryOrder(iter.next());
684
685
iter.setOffset(16);
686
int elt16 = CollationElementIterator.primaryOrder(iter.next());
687
688
iter.setOffset(17);
689
int elt17 = CollationElementIterator.primaryOrder(iter.next());
690
691
iter.setOffset(18);
692
int elt18 = CollationElementIterator.primaryOrder(iter.next());
693
694
iter.setOffset(19);
695
int elt19 = CollationElementIterator.primaryOrder(iter.next());
696
697
if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
698
|| elt14 != elt18 || elt14 != elt19)
699
errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
700
+ elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
701
+ ", elt18 = " + elt18 + ", elt19 = " + elt19);
702
703
// now generate a complete list of the collation elements,
704
// first using next() and then using setOffset(), and
705
// make sure both interfaces return the same set of elements
706
iter.reset();
707
708
int elt = iter.next();
709
int count = 0;
710
while (elt != CollationElementIterator.NULLORDER) {
711
++count;
712
elt = iter.next();
713
}
714
715
String[] nextElements = new String[count];
716
String[] setOffsetElements = new String[count];
717
int lastPos = 0;
718
719
iter.reset();
720
elt = iter.next();
721
count = 0;
722
while (elt != CollationElementIterator.NULLORDER) {
723
nextElements[count++] = testText.substring(lastPos, iter.getOffset());
724
lastPos = iter.getOffset();
725
elt = iter.next();
726
}
727
count = 0;
728
for (int i = 0; i < testText.length(); ) {
729
iter.setOffset(i);
730
lastPos = iter.getOffset();
731
elt = iter.next();
732
setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
733
i = iter.getOffset();
734
}
735
for (int i = 0; i < nextElements.length; i++) {
736
if (nextElements[i].equals(setOffsetElements[i])) {
737
logln(nextElements[i]);
738
} else {
739
errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
740
+ setOffsetElements[i]);
741
}
742
}
743
}
744
745
public void Test4216006() throws Exception {
746
// rule parser barfs on "<\u00e0=a\u0300", and on other cases
747
// where the same token (after normalization) appears twice in a row
748
boolean caughtException = false;
749
try {
750
RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300");
751
}
752
catch (ParseException e) {
753
caughtException = true;
754
}
755
if (!caughtException) {
756
throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
757
}
758
759
RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
760
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
761
collator.setStrength(Collator.IDENTICAL);
762
763
String[] tests = {
764
"a\u0300", "=", "\u00e0",
765
"\u00e0", "=", "a\u0300"
766
};
767
768
compareArray(collator, tests);
769
}
770
771
public void Test4171974() {
772
// test French accent ordering more thoroughly
773
String[] frenchList = {
774
"\u0075\u0075", // u u
775
"\u00fc\u0075", // u-umlaut u
776
"\u01d6\u0075", // u-umlaut-macron u
777
"\u016b\u0075", // u-macron u
778
"\u1e7b\u0075", // u-macron-umlaut u
779
"\u0075\u00fc", // u u-umlaut
780
"\u00fc\u00fc", // u-umlaut u-umlaut
781
"\u01d6\u00fc", // u-umlaut-macron u-umlaut
782
"\u016b\u00fc", // u-macron u-umlaut
783
"\u1e7b\u00fc", // u-macron-umlaut u-umlaut
784
"\u0075\u01d6", // u u-umlaut-macron
785
"\u00fc\u01d6", // u-umlaut u-umlaut-macron
786
"\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron
787
"\u016b\u01d6", // u-macron u-umlaut-macron
788
"\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron
789
"\u0075\u016b", // u u-macron
790
"\u00fc\u016b", // u-umlaut u-macron
791
"\u01d6\u016b", // u-umlaut-macron u-macron
792
"\u016b\u016b", // u-macron u-macron
793
"\u1e7b\u016b", // u-macron-umlaut u-macron
794
"\u0075\u1e7b", // u u-macron-umlaut
795
"\u00fc\u1e7b", // u-umlaut u-macron-umlaut
796
"\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut
797
"\u016b\u1e7b", // u-macron u-macron-umlaut
798
"\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut
799
};
800
Collator french = Collator.getInstance(Locale.FRENCH);
801
802
logln("Testing French order...");
803
checkListOrder(frenchList, french);
804
805
logln("Testing French order without decomposition...");
806
french.setDecomposition(Collator.NO_DECOMPOSITION);
807
checkListOrder(frenchList, french);
808
809
String[] englishList = {
810
"\u0075\u0075", // u u
811
"\u0075\u00fc", // u u-umlaut
812
"\u0075\u01d6", // u u-umlaut-macron
813
"\u0075\u016b", // u u-macron
814
"\u0075\u1e7b", // u u-macron-umlaut
815
"\u00fc\u0075", // u-umlaut u
816
"\u00fc\u00fc", // u-umlaut u-umlaut
817
"\u00fc\u01d6", // u-umlaut u-umlaut-macron
818
"\u00fc\u016b", // u-umlaut u-macron
819
"\u00fc\u1e7b", // u-umlaut u-macron-umlaut
820
"\u01d6\u0075", // u-umlaut-macron u
821
"\u01d6\u00fc", // u-umlaut-macron u-umlaut
822
"\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron
823
"\u01d6\u016b", // u-umlaut-macron u-macron
824
"\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut
825
"\u016b\u0075", // u-macron u
826
"\u016b\u00fc", // u-macron u-umlaut
827
"\u016b\u01d6", // u-macron u-umlaut-macron
828
"\u016b\u016b", // u-macron u-macron
829
"\u016b\u1e7b", // u-macron u-macron-umlaut
830
"\u1e7b\u0075", // u-macron-umlaut u
831
"\u1e7b\u00fc", // u-macron-umlaut u-umlaut
832
"\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron
833
"\u1e7b\u016b", // u-macron-umlaut u-macron
834
"\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut
835
};
836
Collator english = Collator.getInstance(Locale.ENGLISH);
837
838
logln("Testing English order...");
839
checkListOrder(englishList, english);
840
841
logln("Testing English order without decomposition...");
842
english.setDecomposition(Collator.NO_DECOMPOSITION);
843
checkListOrder(englishList, english);
844
}
845
846
private void checkListOrder(String[] sortedList, Collator c) {
847
// this function uses the specified Collator to make sure the
848
// passed-in list is already sorted into ascending order
849
for (int i = 0; i < sortedList.length - 1; i++) {
850
if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
851
errln("List out of order at element #" + i + ": "
852
+ prettify(sortedList[i]) + " >= "
853
+ prettify(sortedList[i + 1]));
854
}
855
}
856
}
857
858
// CollationElementIterator set doesn't work propertly with next/prev
859
public void Test4663220() {
860
RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US);
861
CharacterIterator stringIter = new StringCharacterIterator("fox");
862
CollationElementIterator iter = collator.getCollationElementIterator(stringIter);
863
864
int[] elements_next = new int[3];
865
logln("calling next:");
866
for (int i = 0; i < 3; ++i) {
867
logln("[" + i + "] " + (elements_next[i] = iter.next()));
868
}
869
870
int[] elements_fwd = new int[3];
871
logln("calling set/next:");
872
for (int i = 0; i < 3; ++i) {
873
iter.setOffset(i);
874
logln("[" + i + "] " + (elements_fwd[i] = iter.next()));
875
}
876
877
for (int i = 0; i < 3; ++i) {
878
if (elements_next[i] != elements_fwd[i]) {
879
errln("mismatch at position " + i +
880
": " + elements_next[i] +
881
" != " + elements_fwd[i]);
882
}
883
}
884
}
885
886
//------------------------------------------------------------------------
887
// Internal utilities
888
//
889
private void compareArray(Collator c, String[] tests) {
890
for (int i = 0; i < tests.length; i += 3) {
891
892
int expect = 0;
893
if (tests[i+1].equals("<")) {
894
expect = -1;
895
} else if (tests[i+1].equals(">")) {
896
expect = 1;
897
} else if (tests[i+1].equals("=")) {
898
expect = 0;
899
} else {
900
expect = Integer.decode(tests[i+1]).intValue();
901
}
902
903
int result = c.compare(tests[i], tests[i+2]);
904
if (sign(result) != sign(expect))
905
{
906
errln( i/3 + ": compare(" + prettify(tests[i])
907
+ " , " + prettify(tests[i+2])
908
+ ") got " + result + "; expected " + expect);
909
}
910
else
911
{
912
// Collator.compare worked OK; now try the collation keys
913
CollationKey k1 = c.getCollationKey(tests[i]);
914
CollationKey k2 = c.getCollationKey(tests[i+2]);
915
916
result = k1.compareTo(k2);
917
if (sign(result) != sign(expect)) {
918
errln( i/3 + ": key(" + prettify(tests[i])
919
+ ").compareTo(key(" + prettify(tests[i+2])
920
+ ")) got " + result + "; expected " + expect);
921
922
errln(" " + prettify(k1) + " vs. " + prettify(k2));
923
}
924
}
925
}
926
}
927
928
private static final int sign(int i) {
929
if (i < 0) return -1;
930
if (i > 0) return 1;
931
return 0;
932
}
933
934
935
static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
936
937
String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
938
String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
939
String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck";
940
}
941
942