Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/test/java/text/BreakIterator/BreakIteratorTest.java
38813 views
/*1* Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*/2223/*24* @test25* @bug 4035266 4052418 4068133 4068137 4068139 4086052 4095322 409777926* 4097920 4098467 4111338 4113835 4117554 4143071 4146175 415211727* 4152416 4153072 4158381 4214367 4217703 463843328* @library /java/text/testlib29* @run main/timeout=2000 BreakIteratorTest30* @summary test BreakIterator31*/3233/*34*35*36* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved37* (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved38*39* Portions copyright (c) 2007 Sun Microsystems, Inc.40* All Rights Reserved.41*42* The original version of this source code and documentation43* is copyrighted and owned by Taligent, Inc., a wholly-owned44* subsidiary of IBM. These materials are provided under terms45* of a License Agreement between Taligent and Sun. This technology46* is protected by multiple US and International patents.47*48* This notice and attribution to Taligent may not be removed.49* Taligent is a registered trademark of Taligent, Inc.50*51* Permission to use, copy, modify, and distribute this software52* and its documentation for NON-COMMERCIAL purposes and without53* fee is hereby granted provided that this copyright notice54* appears in all copies. Please refer to the file "copyright.html"55* for further important copyright and licensing information.56*57* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF58* THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED59* TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A60* PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR61* ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR62* DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.63*64*/6566import java.text.BreakIterator;67import java.text.CharacterIterator;68import java.text.StringCharacterIterator;69import java.util.Locale;70import java.util.Vector;71import java.util.Enumeration;72import java.io.*;7374public class BreakIteratorTest extends IntlTest75{76private BreakIterator characterBreak;77private BreakIterator wordBreak;78private BreakIterator lineBreak;79private BreakIterator sentenceBreak;8081public static void main(String[] args) throws Exception {82new BreakIteratorTest().run(args);83}8485public BreakIteratorTest()86{87characterBreak = BreakIterator.getCharacterInstance();88wordBreak = BreakIterator.getWordInstance();89lineBreak = BreakIterator.getLineInstance();90sentenceBreak = BreakIterator.getSentenceInstance();91}9293//=========================================================================94// general test subroutines95//=========================================================================9697private void generalIteratorTest(BreakIterator bi, Vector expectedResult) {98StringBuffer buffer = new StringBuffer();99String text;100for (int i = 0; i < expectedResult.size(); i++) {101text = (String)expectedResult.elementAt(i);102buffer.append(text);103}104text = buffer.toString();105106bi.setText(text);107108Vector nextResults = testFirstAndNext(bi, text);109Vector previousResults = testLastAndPrevious(bi, text);110111logln("comparing forward and backward...");112int errs = getErrorCount();113compareFragmentLists("forward iteration", "backward iteration", nextResults,114previousResults);115if (getErrorCount() == errs) {116logln("comparing expected and actual...");117compareFragmentLists("expected result", "actual result", expectedResult,118nextResults);119}120121int[] boundaries = new int[expectedResult.size() + 3];122boundaries[0] = BreakIterator.DONE;123boundaries[1] = 0;124for (int i = 0; i < expectedResult.size(); i++)125boundaries[i + 2] = boundaries[i + 1] + ((String)expectedResult.elementAt(i)).126length();127boundaries[boundaries.length - 1] = BreakIterator.DONE;128129testFollowing(bi, text, boundaries);130testPreceding(bi, text, boundaries);131testIsBoundary(bi, text, boundaries);132133doMultipleSelectionTest(bi, text);134}135136private Vector testFirstAndNext(BreakIterator bi, String text) {137int p = bi.first();138int lastP = p;139Vector<String> result = new Vector<String>();140141if (p != 0)142errln("first() returned " + p + " instead of 0");143while (p != BreakIterator.DONE) {144p = bi.next();145if (p != BreakIterator.DONE) {146if (p <= lastP)147errln("next() failed to move forward: next() on position "148+ lastP + " yielded " + p);149150result.addElement(text.substring(lastP, p));151}152else {153if (lastP != text.length())154errln("next() returned DONE prematurely: offset was "155+ lastP + " instead of " + text.length());156}157lastP = p;158}159return result;160}161162private Vector testLastAndPrevious(BreakIterator bi, String text) {163int p = bi.last();164int lastP = p;165Vector<String> result = new Vector<String>();166167if (p != text.length())168errln("last() returned " + p + " instead of " + text.length());169while (p != BreakIterator.DONE) {170p = bi.previous();171if (p != BreakIterator.DONE) {172if (p >= lastP)173errln("previous() failed to move backward: previous() on position "174+ lastP + " yielded " + p);175176result.insertElementAt(text.substring(p, lastP), 0);177}178else {179if (lastP != 0)180errln("previous() returned DONE prematurely: offset was "181+ lastP + " instead of 0");182}183lastP = p;184}185return result;186}187188private void compareFragmentLists(String f1Name, String f2Name, Vector f1, Vector f2) {189int p1 = 0;190int p2 = 0;191String s1;192String s2;193int t1 = 0;194int t2 = 0;195196while (p1 < f1.size() && p2 < f2.size()) {197s1 = (String)f1.elementAt(p1);198s2 = (String)f2.elementAt(p2);199t1 += s1.length();200t2 += s2.length();201202if (s1.equals(s2)) {203debugLogln(" >" + s1 + "<");204++p1;205++p2;206}207else {208int tempT1 = t1;209int tempT2 = t2;210int tempP1 = p1;211int tempP2 = p2;212213while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {214while (tempT1 < tempT2 && tempP1 < f1.size()) {215tempT1 += ((String)f1.elementAt(tempP1)).length();216++tempP1;217}218while (tempT2 < tempT1 && tempP2 < f2.size()) {219tempT2 += ((String)f2.elementAt(tempP2)).length();220++tempP2;221}222}223logln("*** " + f1Name + " has:");224while (p1 <= tempP1 && p1 < f1.size()) {225s1 = (String)f1.elementAt(p1);226t1 += s1.length();227debugLogln(" *** >" + s1 + "<");228++p1;229}230logln("***** " + f2Name + " has:");231while (p2 <= tempP2 && p2 < f2.size()) {232s2 = (String)f2.elementAt(p2);233t2 += s2.length();234debugLogln(" ***** >" + s2 + "<");235++p2;236}237errln("Discrepancy between " + f1Name + " and " + f2Name + "\n---\n" + f1 +"\n---\n" + f2);238}239}240}241242private void testFollowing(BreakIterator bi, String text, int[] boundaries) {243logln("testFollowing():");244int p = 2;245int i = 0;246try {247for (i = 0; i <= text.length(); i++) { // change to <= when new BI code goes in248if (i == boundaries[p])249++p;250251int b = bi.following(i);252logln("bi.following(" + i + ") -> " + b);253if (b != boundaries[p])254errln("Wrong result from following() for " + i + ": expected " + boundaries[p]255+ ", got " + b);256}257} catch (IllegalArgumentException illargExp) {258errln("IllegalArgumentException caught from following() for offset: " + i);259}260}261262private void testPreceding(BreakIterator bi, String text, int[] boundaries) {263logln("testPreceding():");264int p = 0;265int i = 0;266try {267for (i = 0; i <= text.length(); i++) { // change to <= when new BI code goes in268int b = bi.preceding(i);269logln("bi.preceding(" + i + ") -> " + b);270if (b != boundaries[p])271errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]272+ ", got " + b);273274if (i == boundaries[p + 1])275++p;276}277} catch (IllegalArgumentException illargExp) {278errln("IllegalArgumentException caught from preceding() for offset: " + i);279}280}281282private void testIsBoundary(BreakIterator bi, String text, int[] boundaries) {283logln("testIsBoundary():");284int p = 1;285boolean isB;286for (int i = 0; i <= text.length(); i++) { // change to <= when new BI code goes in287isB = bi.isBoundary(i);288logln("bi.isBoundary(" + i + ") -> " + isB);289290if (i == boundaries[p]) {291if (!isB)292errln("Wrong result from isBoundary() for " + i + ": expected true, got false");293++p;294}295else {296if (isB)297errln("Wrong result from isBoundary() for " + i + ": expected false, got true");298}299}300}301302private void doMultipleSelectionTest(BreakIterator iterator, String testText)303{304logln("Multiple selection test...");305BreakIterator testIterator = (BreakIterator)iterator.clone();306int offset = iterator.first();307int testOffset;308int count = 0;309310do {311testOffset = testIterator.first();312testOffset = testIterator.next(count);313logln("next(" + count + ") -> " + testOffset);314if (offset != testOffset)315errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);316317if (offset != BreakIterator.DONE) {318count++;319offset = iterator.next();320}321} while (offset != BreakIterator.DONE);322323// now do it backwards...324offset = iterator.last();325count = 0;326327do {328testOffset = testIterator.last();329testOffset = testIterator.next(count);330logln("next(" + count + ") -> " + testOffset);331if (offset != testOffset)332errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);333334if (offset != BreakIterator.DONE) {335count--;336offset = iterator.previous();337}338} while (offset != BreakIterator.DONE);339}340341private void doBreakInvariantTest(BreakIterator tb, String testChars)342{343StringBuffer work = new StringBuffer("aaa");344int errorCount = 0;345346// a break should always occur after CR (unless followed by LF), LF, PS, and LS347String breaks = /*"\r\n\u2029\u2028"*/"\n\u2029\u2028";348// change this back when new BI code is added349350for (int i = 0; i < breaks.length(); i++) {351work.setCharAt(1, breaks.charAt(i));352for (int j = 0; j < testChars.length(); j++) {353work.setCharAt(0, testChars.charAt(j));354for (int k = 0; k < testChars.length(); k++) {355char c = testChars.charAt(k);356357// if a cr is followed by lf, don't do the check (they stay together)358if (work.charAt(1) == '\r' && (c == '\n'))359continue;360361// CONTROL (Cc) and FORMAT (Cf) Characters are to be ignored362// for breaking purposes as per UTR14363int type1 = Character.getType(work.charAt(1));364int type2 = Character.getType(c);365if (type1 == Character.CONTROL || type1 == Character.FORMAT ||366type2 == Character.CONTROL || type2 == Character.FORMAT) {367continue;368}369370work.setCharAt(2, c);371tb.setText(work.toString());372boolean seen2 = false;373for (int l = tb.first(); l != BreakIterator.DONE; l = tb.next()) {374if (l == 2)375seen2 = true;376}377if (!seen2) {378errln("No break between U+" + Integer.toHexString((int)(work.charAt(1)))379+ " and U+" + Integer.toHexString((int)(work.charAt(2))));380errorCount++;381if (errorCount >= 75)382return;383}384}385}386}387}388389private void doOtherInvariantTest(BreakIterator tb, String testChars)390{391StringBuffer work = new StringBuffer("a\r\na");392int errorCount = 0;393394// a break should never occur between CR and LF395for (int i = 0; i < testChars.length(); i++) {396work.setCharAt(0, testChars.charAt(i));397for (int j = 0; j < testChars.length(); j++) {398work.setCharAt(3, testChars.charAt(j));399tb.setText(work.toString());400for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())401if (k == 2) {402errln("Break between CR and LF in string U+" + Integer.toHexString(403(int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(404(int)(work.charAt(3))));405errorCount++;406if (errorCount >= 75)407return;408}409}410}411412// a break should never occur before a non-spacing mark, unless it's preceded413// by a line terminator414work.setLength(0);415work.append("aaaa");416for (int i = 0; i < testChars.length(); i++) {417char c = testChars.charAt(i);418if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')419continue;420work.setCharAt(1, c);421for (int j = 0; j < testChars.length(); j++) {422c = testChars.charAt(j);423if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c)424!= Character.ENCLOSING_MARK)425continue;426work.setCharAt(2, c);427428// CONTROL (Cc) and FORMAT (Cf) Characters are to be ignored429// for breaking purposes as per UTR14430int type1 = Character.getType(work.charAt(1));431int type2 = Character.getType(work.charAt(2));432if (type1 == Character.CONTROL || type1 == Character.FORMAT ||433type2 == Character.CONTROL || type2 == Character.FORMAT) {434continue;435}436437tb.setText(work.toString());438for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())439if (k == 2) {440errln("Break between U+" + Integer.toHexString((int)(work.charAt(1)))441+ " and U+" + Integer.toHexString((int)(work.charAt(2))));442errorCount++;443if (errorCount >= 75)444return;445}446}447}448}449450public void debugLogln(String s) {451final String zeros = "0000";452String temp;453StringBuffer out = new StringBuffer();454for (int i = 0; i < s.length(); i++) {455char c = s.charAt(i);456if (c >= ' ' && c < '\u007f')457out.append(c);458else {459out.append("\\u");460temp = Integer.toHexString((int)c);461out.append(zeros.substring(0, 4 - temp.length()));462out.append(temp);463}464}465logln(out.toString());466}467468//=========================================================================469// tests470//=========================================================================471472public void TestWordBreak() {473474Vector<String> wordSelectionData = new Vector<String>();475476wordSelectionData.addElement("12,34");477478wordSelectionData.addElement(" ");479wordSelectionData.addElement("\u00A2"); //cent sign480wordSelectionData.addElement("\u00A3"); //pound sign481wordSelectionData.addElement("\u00A4"); //currency sign482wordSelectionData.addElement("\u00A5"); //yen sign483wordSelectionData.addElement("alpha-beta-gamma");484wordSelectionData.addElement(".");485wordSelectionData.addElement(" ");486wordSelectionData.addElement("Badges");487wordSelectionData.addElement("?");488wordSelectionData.addElement(" ");489wordSelectionData.addElement("BADGES");490wordSelectionData.addElement("!");491wordSelectionData.addElement("?");492wordSelectionData.addElement("!");493wordSelectionData.addElement(" ");494wordSelectionData.addElement("We");495wordSelectionData.addElement(" ");496wordSelectionData.addElement("don't");497wordSelectionData.addElement(" ");498wordSelectionData.addElement("need");499wordSelectionData.addElement(" ");500wordSelectionData.addElement("no");501wordSelectionData.addElement(" ");502wordSelectionData.addElement("STINKING");503wordSelectionData.addElement(" ");504wordSelectionData.addElement("BADGES");505wordSelectionData.addElement("!");506wordSelectionData.addElement("!");507wordSelectionData.addElement("!");508509wordSelectionData.addElement("012.566,5");510wordSelectionData.addElement(" ");511wordSelectionData.addElement("123.3434,900");512wordSelectionData.addElement(" ");513wordSelectionData.addElement("1000,233,456.000");514wordSelectionData.addElement(" ");515wordSelectionData.addElement("1,23.322%");516wordSelectionData.addElement(" ");517wordSelectionData.addElement("123.1222");518519wordSelectionData.addElement(" ");520wordSelectionData.addElement("\u0024123,000.20");521522wordSelectionData.addElement(" ");523wordSelectionData.addElement("179.01\u0025");524525wordSelectionData.addElement("Hello");526wordSelectionData.addElement(",");527wordSelectionData.addElement(" ");528wordSelectionData.addElement("how");529wordSelectionData.addElement(" ");530wordSelectionData.addElement("are");531wordSelectionData.addElement(" ");532wordSelectionData.addElement("you");533wordSelectionData.addElement(" ");534wordSelectionData.addElement("X");535wordSelectionData.addElement(" ");536537wordSelectionData.addElement("Now");538wordSelectionData.addElement("\r");539wordSelectionData.addElement("is");540wordSelectionData.addElement("\n");541wordSelectionData.addElement("the");542wordSelectionData.addElement("\r\n");543wordSelectionData.addElement("time");544wordSelectionData.addElement("\n");545wordSelectionData.addElement("\r");546wordSelectionData.addElement("for");547wordSelectionData.addElement("\r");548wordSelectionData.addElement("\r");549wordSelectionData.addElement("all");550wordSelectionData.addElement(" ");551552generalIteratorTest(wordBreak, wordSelectionData);553}554555public void TestBug4097779() {556Vector<String> wordSelectionData = new Vector<String>();557558wordSelectionData.addElement("aa\u0300a");559wordSelectionData.addElement(" ");560561generalIteratorTest(wordBreak, wordSelectionData);562}563564public void TestBug4098467Words() {565Vector<String> wordSelectionData = new Vector<String>();566567// What follows is a string of Korean characters (I found it in the Yellow Pages568// ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed569// it correctly), first as precomposed syllables, and then as conjoining jamo.570// Both sequences should be semantically identical and break the same way.571// precomposed syllables...572wordSelectionData.addElement("\uc0c1\ud56d");573wordSelectionData.addElement(" ");574wordSelectionData.addElement("\ud55c\uc778");575wordSelectionData.addElement(" ");576wordSelectionData.addElement("\uc5f0\ud569");577wordSelectionData.addElement(" ");578wordSelectionData.addElement("\uc7a5\ub85c\uad50\ud68c");579wordSelectionData.addElement(" ");580// conjoining jamo...581wordSelectionData.addElement("\u1109\u1161\u11bc\u1112\u1161\u11bc");582wordSelectionData.addElement(" ");583wordSelectionData.addElement("\u1112\u1161\u11ab\u110b\u1175\u11ab");584wordSelectionData.addElement(" ");585wordSelectionData.addElement("\u110b\u1167\u11ab\u1112\u1161\u11b8");586wordSelectionData.addElement(" ");587wordSelectionData.addElement("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");588wordSelectionData.addElement(" ");589590generalIteratorTest(wordBreak, wordSelectionData);591}592593public void TestBug4117554Words() {594Vector<String> wordSelectionData = new Vector<String>();595596// this is a test for bug #4117554: the ideographic iteration mark (U+3005) should597// count as a Kanji character for the purposes of word breaking598wordSelectionData.addElement("abc");599wordSelectionData.addElement("\u4e01\u4e02\u3005\u4e03\u4e03");600wordSelectionData.addElement("abc");601602generalIteratorTest(wordBreak, wordSelectionData);603}604605public void TestSentenceBreak() {606Vector<String> sentenceSelectionData = new Vector<String>();607608sentenceSelectionData.addElement("This is a simple sample sentence. ");609sentenceSelectionData.addElement("(This is it.) ");610sentenceSelectionData.addElement("This is a simple sample sentence. ");611sentenceSelectionData.addElement("\"This isn\'t it.\" ");612sentenceSelectionData.addElement("Hi! ");613sentenceSelectionData.addElement("This is a simple sample sentence. ");614sentenceSelectionData.addElement("It does not have to make any sense as you can see. ");615sentenceSelectionData.addElement("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ");616sentenceSelectionData.addElement("Che la dritta via aveo smarrita. ");617sentenceSelectionData.addElement("He said, that I said, that you said!! ");618619sentenceSelectionData.addElement("Don't rock the boat.\u2029");620621sentenceSelectionData.addElement("Because I am the daddy, that is why. ");622sentenceSelectionData.addElement("Not on my time (el timo.)! ");623624sentenceSelectionData.addElement("So what!!\u2029");625626sentenceSelectionData.addElement("\"But now,\" he said, \"I know!\" ");627sentenceSelectionData.addElement("Harris thumbed down several, including \"Away We Go\" (which became the huge success Oklahoma!). ");628sentenceSelectionData.addElement("One species, B. anthracis, is highly virulent.\n");629sentenceSelectionData.addElement("Wolf said about Sounder:\"Beautifully thought-out and directed.\" ");630sentenceSelectionData.addElement("Have you ever said, \"This is where \tI shall live\"? ");631sentenceSelectionData.addElement("He answered, \"You may not!\" ");632sentenceSelectionData.addElement("Another popular saying is: \"How do you do?\". ");633sentenceSelectionData.addElement("Yet another popular saying is: \'I\'m fine thanks.\' ");634sentenceSelectionData.addElement("What is the proper use of the abbreviation pp.? ");635sentenceSelectionData.addElement("Yes, I am definatelly 12\" tall!!");636637generalIteratorTest(sentenceBreak, sentenceSelectionData);638}639640public void TestBug4113835() {641Vector<String> sentenceSelectionData = new Vector<String>();642643// test for bug #4113835: \n and \r count as spaces, not as paragraph breaks644sentenceSelectionData.addElement("Now\ris\nthe\r\ntime\n\rfor\r\rall\u2029");645646generalIteratorTest(sentenceBreak, sentenceSelectionData);647}648649public void TestBug4111338() {650Vector<String> sentenceSelectionData = new Vector<String>();651652// test for bug #4111338: Don't break sentences at the boundary between CJK653// and other letters654sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c"655+ "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba"656+ "\u611d\u57b6\u2510\u5d46\".\u2029");657sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"658+ "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"659+ "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");660sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4"661+ "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8"662+ "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");663sentenceSelectionData.addElement("He said, \"I can go there.\"\u2029");664665generalIteratorTest(sentenceBreak, sentenceSelectionData);666}667668public void TestBug4117554Sentences() {669Vector<String> sentenceSelectionData = new Vector<String>();670671// Treat fullwidth variants of .!? the same as their672// normal counterparts673sentenceSelectionData.addElement("I know I'm right\uff0e ");674sentenceSelectionData.addElement("Right\uff1f ");675sentenceSelectionData.addElement("Right\uff01 ");676677// Don't break sentences at boundary between CJK and digits678sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"679+ "\u97e48888\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"680+ "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");681682// Break sentence between a sentence terminator and683// opening punctuation684sentenceSelectionData.addElement("no?");685sentenceSelectionData.addElement("(yes)");686687generalIteratorTest(sentenceBreak, sentenceSelectionData);688}689690public void TestBug4158381() {691Vector<String> sentenceSelectionData = new Vector<String>();692693// Don't break sentence after period if it isn't followed by a space694sentenceSelectionData.addElement("Test <code>Flags.Flag</code> class. ");695sentenceSelectionData.addElement("Another test.\u2029");696697// No breaks when there are no terminators around698sentenceSelectionData.addElement("<P>Provides a set of "699+ ""lightweight" (all-java<FONT SIZE=\"-2\"><SUP>TM"700+ "</SUP></FONT> language) components that, "701+ "to the maximum degree possible, work the same on all platforms. ");702sentenceSelectionData.addElement("Another test.\u2029");703704generalIteratorTest(sentenceBreak, sentenceSelectionData);705}706707public void TestBug4143071() {708Vector<String> sentenceSelectionData = new Vector<String>();709710// Make sure sentences that end with digits work right711sentenceSelectionData.addElement("Today is the 27th of May, 1998. ");712sentenceSelectionData.addElement("Tomorrow with be 28 May 1998. ");713sentenceSelectionData.addElement("The day after will be the 30th.\u2029");714715generalIteratorTest(sentenceBreak, sentenceSelectionData);716}717718public void TestBug4152416() {719Vector<String> sentenceSelectionData = new Vector<String>();720721// Make sure sentences ending with a capital letter are treated correctly722sentenceSelectionData.addElement("The type of all primitive "723+ "<code>boolean</code> values accessed in the target VM. ");724sentenceSelectionData.addElement("Calls to xxx will return an "725+ "implementor of this interface.\u2029");726727generalIteratorTest(sentenceBreak, sentenceSelectionData);728}729730public void TestBug4152117() {731Vector<String> sentenceSelectionData = new Vector<String>();732733// Make sure sentence breaking is handling punctuation correctly734// [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE735// IT DOESN'T CROP UP]736sentenceSelectionData.addElement("Constructs a randomly generated "737+ "BigInteger, uniformly distributed over the range <tt>0</tt> "738+ "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive. ");739sentenceSelectionData.addElement("The uniformity of the distribution "740+ "assumes that a fair source of random bits is provided in "741+ "<tt>rnd</tt>. ");742sentenceSelectionData.addElement("Note that this constructor always "743+ "constructs a non-negative BigInteger.\u2029");744745generalIteratorTest(sentenceBreak, sentenceSelectionData);746}747748public void TestLineBreak() {749Vector<String> lineSelectionData = new Vector<String>();750751lineSelectionData.addElement("Multi-");752lineSelectionData.addElement("Level ");753lineSelectionData.addElement("example ");754lineSelectionData.addElement("of ");755lineSelectionData.addElement("a ");756lineSelectionData.addElement("semi-");757lineSelectionData.addElement("idiotic ");758lineSelectionData.addElement("non-");759lineSelectionData.addElement("sensical ");760lineSelectionData.addElement("(non-");761lineSelectionData.addElement("important) ");762lineSelectionData.addElement("sentence. ");763764lineSelectionData.addElement("Hi ");765lineSelectionData.addElement("Hello ");766lineSelectionData.addElement("How\n");767lineSelectionData.addElement("are\r");768lineSelectionData.addElement("you\u2028");769lineSelectionData.addElement("fine.\t");770lineSelectionData.addElement("good. ");771772lineSelectionData.addElement("Now\r");773lineSelectionData.addElement("is\n");774lineSelectionData.addElement("the\r\n");775lineSelectionData.addElement("time\n");776lineSelectionData.addElement("\r");777lineSelectionData.addElement("for\r");778lineSelectionData.addElement("\r");779lineSelectionData.addElement("all");780781generalIteratorTest(lineBreak, lineSelectionData);782}783784public void TestBug4068133() {785Vector<String> lineSelectionData = new Vector<String>();786787lineSelectionData.addElement("\u96f6");788lineSelectionData.addElement("\u4e00\u3002");789lineSelectionData.addElement("\u4e8c\u3001");790lineSelectionData.addElement("\u4e09\u3002\u3001");791lineSelectionData.addElement("\u56db\u3001\u3002\u3001");792lineSelectionData.addElement("\u4e94,");793lineSelectionData.addElement("\u516d.");794lineSelectionData.addElement("\u4e03.\u3001,\u3002");795lineSelectionData.addElement("\u516b");796797generalIteratorTest(lineBreak, lineSelectionData);798}799800public void TestBug4086052() {801Vector<String> lineSelectionData = new Vector<String>();802803lineSelectionData.addElement("foo\u00a0bar ");804// lineSelectionData.addElement("foo\ufeffbar");805806generalIteratorTest(lineBreak, lineSelectionData);807}808809public void TestBug4097920() {810Vector<String> lineSelectionData = new Vector<String>();811812lineSelectionData.addElement("dog,");813lineSelectionData.addElement("cat,");814lineSelectionData.addElement("mouse ");815lineSelectionData.addElement("(one)");816lineSelectionData.addElement("(two)\n");817818generalIteratorTest(lineBreak, lineSelectionData);819}820/*821public void TestBug4035266() {822Vector<String> lineSelectionData = new Vector<String>();823824lineSelectionData.addElement("The ");825lineSelectionData.addElement("balance ");826lineSelectionData.addElement("is ");827lineSelectionData.addElement("$-23,456.78, ");828lineSelectionData.addElement("not ");829lineSelectionData.addElement("-$32,456.78!\n");830831generalIteratorTest(lineBreak, lineSelectionData);832}833*/834public void TestBug4098467Lines() {835Vector<String> lineSelectionData = new Vector<String>();836837// What follows is a string of Korean characters (I found it in the Yellow Pages838// ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed839// it correctly), first as precomposed syllables, and then as conjoining jamo.840// Both sequences should be semantically identical and break the same way.841// precomposed syllables...842lineSelectionData.addElement("\uc0c1");843lineSelectionData.addElement("\ud56d ");844lineSelectionData.addElement("\ud55c");845lineSelectionData.addElement("\uc778 ");846lineSelectionData.addElement("\uc5f0");847lineSelectionData.addElement("\ud569 ");848lineSelectionData.addElement("\uc7a5");849lineSelectionData.addElement("\ub85c");850lineSelectionData.addElement("\uad50");851lineSelectionData.addElement("\ud68c ");852// conjoining jamo...853lineSelectionData.addElement("\u1109\u1161\u11bc\u1112\u1161\u11bc ");854lineSelectionData.addElement("\u1112\u1161\u11ab\u110b\u1175\u11ab ");855lineSelectionData.addElement("\u110b\u1167\u11ab\u1112\u1161\u11b8 ");856lineSelectionData.addElement("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");857858if (Locale.getDefault().getLanguage().equals("th")) {859logln("This test is skipped in th locale.");860return;861}862863generalIteratorTest(lineBreak, lineSelectionData);864}865866public void TestBug4117554Lines() {867Vector<String> lineSelectionData = new Vector<String>();868869// Fullwidth .!? should be treated as postJwrd870lineSelectionData.addElement("\u4e01\uff0e");871lineSelectionData.addElement("\u4e02\uff01");872lineSelectionData.addElement("\u4e03\uff1f");873874generalIteratorTest(lineBreak, lineSelectionData);875}876877public void TestBug4217703() {878if (Locale.getDefault().getLanguage().equals("th")) {879logln("This test is skipped in th locale.");880return;881}882883Vector<String> lineSelectionData = new Vector<String>();884885// There shouldn't be a line break between sentence-ending punctuation886// and a closing quote887lineSelectionData.addElement("He ");888lineSelectionData.addElement("said ");889lineSelectionData.addElement("\"Go!\" ");890lineSelectionData.addElement("I ");891lineSelectionData.addElement("went. ");892893lineSelectionData.addElement("Hashtable$Enumeration ");894lineSelectionData.addElement("getText().");895lineSelectionData.addElement("getIndex()");896897generalIteratorTest(lineBreak, lineSelectionData);898}899900private static final String graveS = "S\u0300";901private static final String acuteBelowI = "i\u0317";902private static final String acuteE = "e\u0301";903private static final String circumflexA = "a\u0302";904private static final String tildeE = "e\u0303";905906public void TestCharacterBreak() {907Vector<String> characterSelectionData = new Vector<String>();908909characterSelectionData.addElement(graveS);910characterSelectionData.addElement(acuteBelowI);911characterSelectionData.addElement("m");912characterSelectionData.addElement("p");913characterSelectionData.addElement("l");914characterSelectionData.addElement(acuteE);915characterSelectionData.addElement(" ");916characterSelectionData.addElement("s");917characterSelectionData.addElement(circumflexA);918characterSelectionData.addElement("m");919characterSelectionData.addElement("p");920characterSelectionData.addElement("l");921characterSelectionData.addElement(tildeE);922characterSelectionData.addElement(".");923characterSelectionData.addElement("w");924characterSelectionData.addElement(circumflexA);925characterSelectionData.addElement("w");926characterSelectionData.addElement("a");927characterSelectionData.addElement("f");928characterSelectionData.addElement("q");929characterSelectionData.addElement("\n");930characterSelectionData.addElement("\r");931characterSelectionData.addElement("\r\n");932characterSelectionData.addElement("\n");933934generalIteratorTest(characterBreak, characterSelectionData);935}936937public void TestBug4098467Characters() {938Vector<String> characterSelectionData = new Vector<String>();939940// What follows is a string of Korean characters (I found it in the Yellow Pages941// ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed942// it correctly), first as precomposed syllables, and then as conjoining jamo.943// Both sequences should be semantically identical and break the same way.944// precomposed syllables...945characterSelectionData.addElement("\uc0c1");946characterSelectionData.addElement("\ud56d");947characterSelectionData.addElement(" ");948characterSelectionData.addElement("\ud55c");949characterSelectionData.addElement("\uc778");950characterSelectionData.addElement(" ");951characterSelectionData.addElement("\uc5f0");952characterSelectionData.addElement("\ud569");953characterSelectionData.addElement(" ");954characterSelectionData.addElement("\uc7a5");955characterSelectionData.addElement("\ub85c");956characterSelectionData.addElement("\uad50");957characterSelectionData.addElement("\ud68c");958characterSelectionData.addElement(" ");959// conjoining jamo...960characterSelectionData.addElement("\u1109\u1161\u11bc");961characterSelectionData.addElement("\u1112\u1161\u11bc");962characterSelectionData.addElement(" ");963characterSelectionData.addElement("\u1112\u1161\u11ab");964characterSelectionData.addElement("\u110b\u1175\u11ab");965characterSelectionData.addElement(" ");966characterSelectionData.addElement("\u110b\u1167\u11ab");967characterSelectionData.addElement("\u1112\u1161\u11b8");968characterSelectionData.addElement(" ");969characterSelectionData.addElement("\u110c\u1161\u11bc");970characterSelectionData.addElement("\u1105\u1169");971characterSelectionData.addElement("\u1100\u116d");972characterSelectionData.addElement("\u1112\u116c");973974generalIteratorTest(characterBreak, characterSelectionData);975}976977public void TestBug4153072() {978BreakIterator iter = BreakIterator.getWordInstance();979String str = "...Hello, World!...";980int begin = 3;981int end = str.length() - 3;982boolean gotException = false;983boolean dummy;984985iter.setText(new StringCharacterIterator(str, begin, end, begin));986for (int index = -1; index < begin + 1; ++index) {987try {988dummy = iter.isBoundary(index);989if (index < begin)990errln("Didn't get exception with offset = " + index +991" and begin index = " + begin);992}993catch (IllegalArgumentException e) {994if (index >= begin)995errln("Got exception with offset = " + index +996" and begin index = " + begin);997}998}999}10001001public void TestBug4146175Sentences() {1002Vector<String> sentenceSelectionData = new Vector<String>();10031004// break between periods and opening punctuation even when there's no1005// intervening space1006sentenceSelectionData.addElement("end.");1007sentenceSelectionData.addElement("(This is\u2029");10081009// treat the fullwidth period as an unambiguous sentence terminator1010sentenceSelectionData.addElement("\u7d42\u308f\u308a\uff0e");1011sentenceSelectionData.addElement("\u300c\u3053\u308c\u306f");10121013generalIteratorTest(sentenceBreak, sentenceSelectionData);1014}10151016public void TestBug4146175Lines() {1017if (Locale.getDefault().getLanguage().equals("th")) {1018logln("This test is skipped in th locale.");1019return;1020}10211022Vector<String> lineSelectionData = new Vector<String>();10231024// the fullwidth comma should stick to the preceding Japanese character1025lineSelectionData.addElement("\u7d42\uff0c");1026lineSelectionData.addElement("\u308f");10271028generalIteratorTest(lineBreak, lineSelectionData);1029}10301031public void TestBug4214367() {1032if (Locale.getDefault().getLanguage().equals("th")) {1033logln("This test is skipped in th locale.");1034return;1035}10361037Vector<String> wordSelectionData = new Vector<String>();10381039// the hiragana and katakana iteration marks and the long vowel mark1040// are not being treated correctly by the word-break iterator1041wordSelectionData.addElement("\u3042\u3044\u309d\u3042\u309e\u3042\u30fc\u3042");1042wordSelectionData.addElement("\u30a2\u30a4\u30fd\u30a2\u30fe\u30a2\u30fc\u30a2");10431044generalIteratorTest(wordBreak, wordSelectionData);1045}10461047private static final String cannedTestChars // characters fo the class Cc are ignorable for breaking1048= /*"\u0000\u0001\u0002\u0003\u0004*/" !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"1049+ "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3"1050+ "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303"1051+ "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000"1052+ "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f"1053+ "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164";10541055public void TestSentenceInvariants()1056{1057BreakIterator e = BreakIterator.getSentenceInstance();1058doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");1059}10601061public void TestWordInvariants()1062{1063if (Locale.getDefault().getLanguage().equals("th")) {1064logln("This test is skipped in th locale.");1065return;1066}10671068BreakIterator e = BreakIterator.getWordInstance();1069doBreakInvariantTest(e, cannedTestChars + "\',.\u3041\u3042\u3043\u309b\u309c\u30a1\u30a2"1070+ "\u30a3\u4e00\u4e01\u4e02");1071doOtherInvariantTest(e, cannedTestChars + "\',.\u3041\u3042\u3043\u309b\u309c\u30a1\u30a2"1072+ "\u30a3\u4e00\u4e01\u4e02");1073}10741075public void TestLineInvariants()1076{1077if (Locale.getDefault().getLanguage().equals("th")) {1078logln("This test is skipped in th locale.");1079return;1080}10811082BreakIterator e = BreakIterator.getLineInstance();1083String testChars = cannedTestChars + ".,;:\u3001\u3002\u3041\u3042\u3043\u3044\u3045"1084+ "\u30a3\u4e00\u4e01\u4e02";1085doBreakInvariantTest(e, testChars);1086doOtherInvariantTest(e, testChars);10871088int errorCount = 0;10891090// in addition to the other invariants, a line-break iterator should make sure that:1091// it doesn't break around the non-breaking characters1092String noBreak = "\u00a0\u2007\u2011\ufeff";1093StringBuffer work = new StringBuffer("aaa");1094for (int i = 0; i < testChars.length(); i++) {1095char c = testChars.charAt(i);1096if (c == '\r' || c == '\n' || c == '\u2029' || c == '\u2028' || c == '\u0003')1097continue;1098work.setCharAt(0, c);1099for (int j = 0; j < noBreak.length(); j++) {1100work.setCharAt(1, noBreak.charAt(j));1101for (int k = 0; k < testChars.length(); k++) {1102work.setCharAt(2, testChars.charAt(k));1103// CONTROL (Cc) and FORMAT (Cf) Characters are to be ignored1104// for breaking purposes as per UTR141105int type1 = Character.getType(work.charAt(1));1106int type2 = Character.getType(work.charAt(2));1107if (type1 == Character.CONTROL || type1 == Character.FORMAT ||1108type2 == Character.CONTROL || type2 == Character.FORMAT) {1109continue;1110}1111e.setText(work.toString());1112for (int l = e.first(); l != BreakIterator.DONE; l = e.next()) {1113if (l == 1 || l == 2) {1114//errln("Got break between U+" + Integer.toHexString((int)1115// (work.charAt(l - 1))) + " and U+" + Integer.toHexString(1116// (int)(work.charAt(l))) + "\ntype1 = " + type1 + "\ntype2 = " + type2);1117// as per UTR14 spaces followed by a GLUE character should allow1118// line breaking1119if (work.charAt(l-1) == '\u0020' && (work.charAt(l) == '\u00a0' ||1120work.charAt(l) == '\u0f0c' ||1121work.charAt(l) == '\u2007' ||1122work.charAt(l) == '\u2011' ||1123work.charAt(l) == '\u202f' ||1124work.charAt(l) == '\ufeff')) {1125continue;1126}1127errln("Got break between U+" + Integer.toHexString((int)1128(work.charAt(l - 1))) + " and U+" + Integer.toHexString(1129(int)(work.charAt(l))));1130errorCount++;1131if (errorCount >= 75)1132return;1133}1134}1135}1136}1137}11381139// The following test has so many exceptions that it would be better to write a new set of data1140// that tested exactly what should be tested1141// Until that point it will be commented out1142/*11431144// it does break after dashes (unless they're followed by a digit, a non-spacing mark,1145// a currency symbol, a space, a format-control character, a regular control character,1146// a line or paragraph separator, or another dash)1147String dashes = "-\u00ad\u2010\u2012\u2013\u2014";1148for (int i = 0; i < testChars.length(); i++) {1149work.setCharAt(0, testChars.charAt(i));1150for (int j = 0; j < dashes.length(); j++) {1151work.setCharAt(1, dashes.charAt(j));1152for (int k = 0; k < testChars.length(); k++) {1153char c = testChars.charAt(k);1154if (Character.getType(c) == Character.DECIMAL_DIGIT_NUMBER ||1155Character.getType(c) == Character.OTHER_NUMBER ||1156Character.getType(c) == Character.NON_SPACING_MARK ||1157Character.getType(c) == Character.ENCLOSING_MARK ||1158Character.getType(c) == Character.CURRENCY_SYMBOL ||1159Character.getType(c) == Character.DASH_PUNCTUATION ||1160Character.getType(c) == Character.SPACE_SEPARATOR ||1161Character.getType(c) == Character.FORMAT ||1162Character.getType(c) == Character.CONTROL ||1163Character.getType(c) == Character.END_PUNCTUATION ||1164Character.getType(c) == Character.FINAL_QUOTE_PUNCTUATION ||1165Character.getType(c) == Character.OTHER_PUNCTUATION ||1166c == '\'' || c == '\"' ||1167// category EX as per UTR141168c == '!' || c == '?' || c == '\ufe56' || c == '\ufe57' || c == '\uff01' || c == '\uff1f' ||1169c == '\n' || c == '\r' || c == '\u2028' || c == '\u2029' ||1170c == '\u0003' || c == '\u2007' || c == '\u2011' ||1171c == '\ufeff')1172continue;1173work.setCharAt(2, c);1174e.setText(work.toString());1175boolean saw2 = false;1176for (int l = e.first(); l != BreakIterator.DONE; l = e.next())1177if (l == 2)1178saw2 = true;1179if (!saw2) {1180errln("Didn't get break between U+" + Integer.toHexString((int)1181(work.charAt(1))) + " and U+" + Integer.toHexString(1182(int)(work.charAt(2))));1183errorCount++;1184if (errorCount >= 75)1185return;1186}1187}1188}1189}1190*/1191}11921193public void TestCharacterInvariants()1194{1195BreakIterator e = BreakIterator.getCharacterInstance();1196doBreakInvariantTest(e, cannedTestChars + "\u1100\u1101\u1102\u1160\u1161\u1162\u11a8"1197+ "\u11a9\u11aa");1198doOtherInvariantTest(e, cannedTestChars + "\u1100\u1101\u1102\u1160\u1161\u1162\u11a8"1199+ "\u11a9\u11aa");1200}12011202public void TestEmptyString()1203{1204String text = "";1205Vector<String> x = new Vector<String>();1206x.addElement(text);12071208generalIteratorTest(lineBreak, x);1209}12101211public void TestGetAvailableLocales()1212{1213Locale[] locList = BreakIterator.getAvailableLocales();12141215if (locList.length == 0)1216errln("getAvailableLocales() returned an empty list!");1217// I have no idea how to test this function...1218}121912201221/**1222* Bug 40953221223*/1224public void TestJapaneseLineBreak()1225{1226StringBuffer testString = new StringBuffer("\u4e00x\u4e8c");1227// Breaking on <Kanji>$<Kanji> is inconsistent12281229/* Characters in precedingChars and followingChars have been updated1230* from Unicode 2.0.14-based to 3.0.0-based when 4638433 was fixed.1231* In concrete terms,1232* 0x301F : Its category was changed from Ps to Pe since Unicode 2.1.1233* 0x169B & 0x169C : added since Unicode 3.0.0.1234*/1235String precedingChars =1236/* Puctuation, Open */1237"([{\u201a\u201e\u2045\u207d\u208d\u2329\u3008\u300a\u300c\u300e\u3010\u3014\u3016\u3018\u301a\u301d\ufe35\ufe37\ufe39\ufe3b\ufe3d\ufe3f\ufe41\ufe43\ufe59\ufe5b\ufe5d\uff08\uff3b\uff5b\uff62\u169b"1238/* Punctuation, Initial quote */1239+ "\u00ab\u2018\u201b\u201c\u201f\u2039"1240/* Symbol, Currency */1241+ "\u00a5\u00a3\u00a4\u20a0";12421243String followingChars =1244/* Puctuation, Close */1245")]}\u2046\u207e\u208e\u232a\u3009\u300b\u300d\u300f\u3011\u3015\u3017\u3019\u301b\u301e\u301f\ufd3e\ufe36\ufe38\ufe3a\ufe3c\ufe3e\ufe40\ufe42\ufe44\ufe5a\ufe5c\ufe5e\uff09\uff3d\uff5d\uff63\u169c"1246/* Punctuation, Final quote */1247+ "\u00bb\u2019\u201d\u203a"1248/* Punctuation, Other */1249+ "!%,.:;\u3001\u3002\u2030\u2031\u2032\u2033\u2034"1250/* Punctuation, Dash */1251+ "\u2103\u2109"1252/* Symbol, Currency */1253+ "\u00a2"1254/* Letter, Modifier */1255+ "\u3005\u309d\u309e"1256/* Letter, Other */1257+ "\u3063\u3083\u3085\u3087\u30c3\u30e3\u30e5\u30e7\u30fc\u30fd\u30fe"1258/* Mark, Non-Spacing */1259+ "\u0300\u0301\u0302"1260/* Symbol, Modifier */1261+ "\u309b\u309c"1262/* Symbol, Other */1263+ "\u00b0";12641265BreakIterator iter = BreakIterator.getLineInstance(Locale.JAPAN);12661267for (int i = 0; i < precedingChars.length(); i++) {1268testString.setCharAt(1, precedingChars.charAt(i));1269iter.setText(testString.toString());1270int j = iter.first();1271if (j != 0) {1272errln("ja line break failure: failed to start at 0 and bounced at " + j);1273}1274j = iter.next();1275if (j != 1) {1276errln("ja line break failure: failed to stop before '"1277+ precedingChars.charAt(i) + "' (\\u"1278+ Integer.toString(precedingChars.charAt(i), 16)1279+ ") at 1 and bounded at " + j);1280}1281j = iter.next();1282if (j != 3) {1283errln("ja line break failure: failed to skip position after '"1284+ precedingChars.charAt(i) + "' (\\u"1285+ Integer.toString(precedingChars.charAt(i), 16)1286+ ") at 3 and bounded at " + j);1287}1288}12891290for (int i = 0; i < followingChars.length(); i++) {1291testString.setCharAt(1, followingChars.charAt(i));1292iter.setText(testString.toString());1293int j = iter.first();1294if (j != 0) {1295errln("ja line break failure: failed to start at 0 and bounded at " + j);1296}1297j = iter.next();1298if (j != 2) {1299errln("ja line break failure: failed to skip position before '"1300+ followingChars.charAt(i) + "' (\\u"1301+ Integer.toString(followingChars.charAt(i), 16)1302+ ") at 2 and bounded at " + j);1303}1304j = iter.next();1305if (j != 3) {1306errln("ja line break failure: failed to stop after '"1307+ followingChars.charAt(i) + "' (\\u"1308+ Integer.toString(followingChars.charAt(i), 16)1309+ ") at 3 and bounded at " + j);1310}1311}1312}13131314/**1315* Bug 46384331316*/1317public void TestLineBreakBasedOnUnicode3_0_0()1318{1319BreakIterator iter;1320int i;13211322/* Latin Extend-B characters1323* 0x0218-0x0233 which have been added since Unicode 3.0.0.1324*/1325iter = BreakIterator.getWordInstance(Locale.US);1326iter.setText("\u0216\u0217\u0218\u0219\u021A");1327i = iter.first();1328i = iter.next();1329if (i != 5) {1330errln("Word break failure: failed to stop at 5 and bounded at " + i);1331}133213331334iter = BreakIterator.getLineInstance(Locale.US);13351336/* <Three(Nd)><Two(Nd)><Low Double Prime Quotation Mark(Pe)><One(Nd)>1337* \u301f has changed its category from Ps to Pe since Unicode 2.1.1338*/1339iter.setText("32\u301f1");1340i = iter.first();1341i = iter.next();1342if (i != 3) {1343errln("Line break failure: failed to skip before \\u301F(Pe) at 3 and bounded at " + i);1344}13451346/* Mongolian <Letter A(Lo)><Todo Soft Hyphen(Pd)><Letter E(Lo)>1347* which have been added since Unicode 3.0.0.1348*/1349iter.setText("\u1820\u1806\u1821");1350i = iter.first();1351i = iter.next();1352if (i != 2) {1353errln("Mongolian line break failure: failed to skip position before \\u1806(Pd) at 2 and bounded at " + i);1354}13551356/* Khmer <ZERO(Nd)><Currency Symbol(Sc)><ONE(Nd)> which have1357* been added since Unicode 3.0.0.1358*/1359iter.setText("\u17E0\u17DB\u17E1");1360i = iter.first();1361i = iter.next();1362if (i != 1) {1363errln("Khmer line break failure: failed to stop before \\u17DB(Sc) at 1 and bounded at " + i);1364}1365i = iter.next();1366if (i != 3) {1367errln("Khmer line break failure: failed to skip position after \\u17DB(Sc) at 3 and bounded at " + i);1368}13691370/* Ogham <Letter UR(Lo)><Space Mark(Zs)><Letter OR(Lo)> which have1371* been added since Unicode 3.0.0.1372*/1373iter.setText("\u1692\u1680\u1696");1374i = iter.first();1375i = iter.next();1376if (i != 2) {1377errln("Ogham line break failure: failed to skip postion before \\u1680(Zs) at 2 and bounded at " + i);1378}137913801381// Confirm changes in BreakIteratorRules_th.java have been reflected.1382iter = BreakIterator.getLineInstance(new Locale("th", ""));13831384/* Thai <Seven(Nd)>1385* <Left Double Quotation Mark(Pi)>1386* <Five(Nd)>1387* <Right Double Quotation Mark(Pf)>1388* <Three(Nd)>1389*/1390iter.setText("\u0E57\u201C\u0E55\u201D\u0E53");1391i = iter.first();1392i = iter.next();1393if (i != 1) {1394errln("Thai line break failure: failed to stop before \\u201C(Pi) at 1 and bounded at " + i);1395}1396i = iter.next();1397if (i != 4) {1398errln("Thai line break failure: failed to stop after \\u201D(Pf) at 4 and bounded at " + i);1399}1400}14011402/**1403* Bug 40681371404*/1405public void TestEndBehavior()1406{1407String testString = "boo.";1408BreakIterator wb = BreakIterator.getWordInstance();1409wb.setText(testString);14101411if (wb.first() != 0)1412errln("Didn't get break at beginning of string.");1413if (wb.next() != 3)1414errln("Didn't get break before period in \"boo.\"");1415if (wb.current() != 4 && wb.next() != 4)1416errln("Didn't get break at end of string.");1417}14181419// [serialization test has been removed pursuant to bug #4152965]14201421/**1422* Bug 44508041423*/1424public void TestLineBreakContractions() {1425Vector<String> expected = new Vector<String>();14261427expected.add("These ");1428expected.add("are ");1429expected.add("'foobles'. ");1430expected.add("Don't ");1431expected.add("you ");1432expected.add("like ");1433expected.add("them?");1434generalIteratorTest(lineBreak, expected);1435}14361437}143814391440