Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/java/text/Collator.java
38829 views
/*1* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425/*26* (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved27* (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved28*29* The original version of this source code and documentation is copyrighted30* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These31* materials are provided under terms of a License Agreement between Taligent32* and Sun. This technology is protected by multiple US and International33* patents. This notice and attribution to Taligent may not be removed.34* Taligent is a registered trademark of Taligent, Inc.35*36*/3738package java.text;3940import java.lang.ref.SoftReference;41import java.text.spi.CollatorProvider;42import java.util.Locale;43import java.util.ResourceBundle;44import java.util.concurrent.ConcurrentHashMap;45import java.util.concurrent.ConcurrentMap;46import sun.util.locale.provider.LocaleProviderAdapter;47import sun.util.locale.provider.LocaleServiceProviderPool;484950/**51* The <code>Collator</code> class performs locale-sensitive52* <code>String</code> comparison. You use this class to build53* searching and sorting routines for natural language text.54*55* <p>56* <code>Collator</code> is an abstract base class. Subclasses57* implement specific collation strategies. One subclass,58* <code>RuleBasedCollator</code>, is currently provided with59* the Java Platform and is applicable to a wide set of languages. Other60* subclasses may be created to handle more specialized needs.61*62* <p>63* Like other locale-sensitive classes, you can use the static64* factory method, <code>getInstance</code>, to obtain the appropriate65* <code>Collator</code> object for a given locale. You will only need66* to look at the subclasses of <code>Collator</code> if you need67* to understand the details of a particular collation strategy or68* if you need to modify that strategy.69*70* <p>71* The following example shows how to compare two strings using72* the <code>Collator</code> for the default locale.73* <blockquote>74* <pre>{@code75* // Compare two strings in the default locale76* Collator myCollator = Collator.getInstance();77* if( myCollator.compare("abc", "ABC") < 0 )78* System.out.println("abc is less than ABC");79* else80* System.out.println("abc is greater than or equal to ABC");81* }</pre>82* </blockquote>83*84* <p>85* You can set a <code>Collator</code>'s <em>strength</em> property86* to determine the level of difference considered significant in87* comparisons. Four strengths are provided: <code>PRIMARY</code>,88* <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.89* The exact assignment of strengths to language features is90* locale dependant. For example, in Czech, "e" and "f" are considered91* primary differences, while "e" and "ě" are secondary differences,92* "e" and "E" are tertiary differences and "e" and "e" are identical.93* The following shows how both case and accents could be ignored for94* US English.95* <blockquote>96* <pre>97* //Get the Collator for US English and set its strength to PRIMARY98* Collator usCollator = Collator.getInstance(Locale.US);99* usCollator.setStrength(Collator.PRIMARY);100* if( usCollator.compare("abc", "ABC") == 0 ) {101* System.out.println("Strings are equivalent");102* }103* </pre>104* </blockquote>105* <p>106* For comparing <code>String</code>s exactly once, the <code>compare</code>107* method provides the best performance. When sorting a list of108* <code>String</code>s however, it is generally necessary to compare each109* <code>String</code> multiple times. In this case, <code>CollationKey</code>s110* provide better performance. The <code>CollationKey</code> class converts111* a <code>String</code> to a series of bits that can be compared bitwise112* against other <code>CollationKey</code>s. A <code>CollationKey</code> is113* created by a <code>Collator</code> object for a given <code>String</code>.114* <br>115* <strong>Note:</strong> <code>CollationKey</code>s from different116* <code>Collator</code>s can not be compared. See the class description117* for {@link CollationKey}118* for an example using <code>CollationKey</code>s.119*120* @see RuleBasedCollator121* @see CollationKey122* @see CollationElementIterator123* @see Locale124* @author Helena Shih, Laura Werner, Richard Gillam125*/126127public abstract class Collator128implements java.util.Comparator<Object>, Cloneable129{130/**131* Collator strength value. When set, only PRIMARY differences are132* considered significant during comparison. The assignment of strengths133* to language features is locale dependant. A common example is for134* different base letters ("a" vs "b") to be considered a PRIMARY difference.135* @see java.text.Collator#setStrength136* @see java.text.Collator#getStrength137*/138public final static int PRIMARY = 0;139/**140* Collator strength value. When set, only SECONDARY and above differences are141* considered significant during comparison. The assignment of strengths142* to language features is locale dependant. A common example is for143* different accented forms of the same base letter ("a" vs "\u00E4") to be144* considered a SECONDARY difference.145* @see java.text.Collator#setStrength146* @see java.text.Collator#getStrength147*/148public final static int SECONDARY = 1;149/**150* Collator strength value. When set, only TERTIARY and above differences are151* considered significant during comparison. The assignment of strengths152* to language features is locale dependant. A common example is for153* case differences ("a" vs "A") to be considered a TERTIARY difference.154* @see java.text.Collator#setStrength155* @see java.text.Collator#getStrength156*/157public final static int TERTIARY = 2;158159/**160* Collator strength value. When set, all differences are161* considered significant during comparison. The assignment of strengths162* to language features is locale dependant. A common example is for control163* characters ("\u0001" vs "\u0002") to be considered equal at the164* PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL165* level. Additionally, differences between pre-composed accents such as166* "\u00C0" (A-grave) and combining accents such as "A\u0300"167* (A, combining-grave) will be considered significant at the IDENTICAL168* level if decomposition is set to NO_DECOMPOSITION.169*/170public final static int IDENTICAL = 3;171172/**173* Decomposition mode value. With NO_DECOMPOSITION174* set, accented characters will not be decomposed for collation. This175* is the default setting and provides the fastest collation but176* will only produce correct results for languages that do not use accents.177* @see java.text.Collator#getDecomposition178* @see java.text.Collator#setDecomposition179*/180public final static int NO_DECOMPOSITION = 0;181182/**183* Decomposition mode value. With CANONICAL_DECOMPOSITION184* set, characters that are canonical variants according to Unicode185* standard will be decomposed for collation. This should be used to get186* correct collation of accented characters.187* <p>188* CANONICAL_DECOMPOSITION corresponds to Normalization Form D as189* described in190* <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode191* Technical Report #15</a>.192* @see java.text.Collator#getDecomposition193* @see java.text.Collator#setDecomposition194*/195public final static int CANONICAL_DECOMPOSITION = 1;196197/**198* Decomposition mode value. With FULL_DECOMPOSITION199* set, both Unicode canonical variants and Unicode compatibility variants200* will be decomposed for collation. This causes not only accented201* characters to be collated, but also characters that have special formats202* to be collated with their norminal form. For example, the half-width and203* full-width ASCII and Katakana characters are then collated together.204* FULL_DECOMPOSITION is the most complete and therefore the slowest205* decomposition mode.206* <p>207* FULL_DECOMPOSITION corresponds to Normalization Form KD as208* described in209* <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode210* Technical Report #15</a>.211* @see java.text.Collator#getDecomposition212* @see java.text.Collator#setDecomposition213*/214public final static int FULL_DECOMPOSITION = 2;215216/**217* Gets the Collator for the current default locale.218* The default locale is determined by java.util.Locale.getDefault.219* @return the Collator for the default locale.(for example, en_US)220* @see java.util.Locale#getDefault221*/222public static synchronized Collator getInstance() {223return getInstance(Locale.getDefault());224}225226/**227* Gets the Collator for the desired locale.228* @param desiredLocale the desired locale.229* @return the Collator for the desired locale.230* @see java.util.Locale231* @see java.util.ResourceBundle232*/233public static Collator getInstance(Locale desiredLocale) {234SoftReference<Collator> ref = cache.get(desiredLocale);235Collator result = (ref != null) ? ref.get() : null;236if (result == null) {237LocaleProviderAdapter adapter;238adapter = LocaleProviderAdapter.getAdapter(CollatorProvider.class,239desiredLocale);240CollatorProvider provider = adapter.getCollatorProvider();241result = provider.getInstance(desiredLocale);242if (result == null) {243result = LocaleProviderAdapter.forJRE()244.getCollatorProvider().getInstance(desiredLocale);245}246while (true) {247if (ref != null) {248// Remove the empty SoftReference if any249cache.remove(desiredLocale, ref);250}251ref = cache.putIfAbsent(desiredLocale, new SoftReference<>(result));252if (ref == null) {253break;254}255Collator cachedColl = ref.get();256if (cachedColl != null) {257result = cachedColl;258break;259}260}261}262return (Collator) result.clone(); // make the world safe263}264265/**266* Compares the source string to the target string according to the267* collation rules for this Collator. Returns an integer less than,268* equal to or greater than zero depending on whether the source String is269* less than, equal to or greater than the target string. See the Collator270* class description for an example of use.271* <p>272* For a one time comparison, this method has the best performance. If a273* given String will be involved in multiple comparisons, CollationKey.compareTo274* has the best performance. See the Collator class description for an example275* using CollationKeys.276* @param source the source string.277* @param target the target string.278* @return Returns an integer value. Value is less than zero if source is less than279* target, value is zero if source and target are equal, value is greater than zero280* if source is greater than target.281* @see java.text.CollationKey282* @see java.text.Collator#getCollationKey283*/284public abstract int compare(String source, String target);285286/**287* Compares its two arguments for order. Returns a negative integer,288* zero, or a positive integer as the first argument is less than, equal289* to, or greater than the second.290* <p>291* This implementation merely returns292* <code> compare((String)o1, (String)o2) </code>.293*294* @return a negative integer, zero, or a positive integer as the295* first argument is less than, equal to, or greater than the296* second.297* @exception ClassCastException the arguments cannot be cast to Strings.298* @see java.util.Comparator299* @since 1.2300*/301@Override302public int compare(Object o1, Object o2) {303return compare((String)o1, (String)o2);304}305306/**307* Transforms the String into a series of bits that can be compared bitwise308* to other CollationKeys. CollationKeys provide better performance than309* Collator.compare when Strings are involved in multiple comparisons.310* See the Collator class description for an example using CollationKeys.311* @param source the string to be transformed into a collation key.312* @return the CollationKey for the given String based on this Collator's collation313* rules. If the source String is null, a null CollationKey is returned.314* @see java.text.CollationKey315* @see java.text.Collator#compare316*/317public abstract CollationKey getCollationKey(String source);318319/**320* Convenience method for comparing the equality of two strings based on321* this Collator's collation rules.322* @param source the source string to be compared with.323* @param target the target string to be compared with.324* @return true if the strings are equal according to the collation325* rules. false, otherwise.326* @see java.text.Collator#compare327*/328public boolean equals(String source, String target)329{330return (compare(source, target) == Collator.EQUAL);331}332333/**334* Returns this Collator's strength property. The strength property determines335* the minimum level of difference considered significant during comparison.336* See the Collator class description for an example of use.337* @return this Collator's current strength property.338* @see java.text.Collator#setStrength339* @see java.text.Collator#PRIMARY340* @see java.text.Collator#SECONDARY341* @see java.text.Collator#TERTIARY342* @see java.text.Collator#IDENTICAL343*/344public synchronized int getStrength()345{346return strength;347}348349/**350* Sets this Collator's strength property. The strength property determines351* the minimum level of difference considered significant during comparison.352* See the Collator class description for an example of use.353* @param newStrength the new strength value.354* @see java.text.Collator#getStrength355* @see java.text.Collator#PRIMARY356* @see java.text.Collator#SECONDARY357* @see java.text.Collator#TERTIARY358* @see java.text.Collator#IDENTICAL359* @exception IllegalArgumentException If the new strength value is not one of360* PRIMARY, SECONDARY, TERTIARY or IDENTICAL.361*/362public synchronized void setStrength(int newStrength) {363if ((newStrength != PRIMARY) &&364(newStrength != SECONDARY) &&365(newStrength != TERTIARY) &&366(newStrength != IDENTICAL)) {367throw new IllegalArgumentException("Incorrect comparison level.");368}369strength = newStrength;370}371372/**373* Get the decomposition mode of this Collator. Decomposition mode374* determines how Unicode composed characters are handled. Adjusting375* decomposition mode allows the user to select between faster and more376* complete collation behavior.377* <p>The three values for decomposition mode are:378* <UL>379* <LI>NO_DECOMPOSITION,380* <LI>CANONICAL_DECOMPOSITION381* <LI>FULL_DECOMPOSITION.382* </UL>383* See the documentation for these three constants for a description384* of their meaning.385* @return the decomposition mode386* @see java.text.Collator#setDecomposition387* @see java.text.Collator#NO_DECOMPOSITION388* @see java.text.Collator#CANONICAL_DECOMPOSITION389* @see java.text.Collator#FULL_DECOMPOSITION390*/391public synchronized int getDecomposition()392{393return decmp;394}395/**396* Set the decomposition mode of this Collator. See getDecomposition397* for a description of decomposition mode.398* @param decompositionMode the new decomposition mode.399* @see java.text.Collator#getDecomposition400* @see java.text.Collator#NO_DECOMPOSITION401* @see java.text.Collator#CANONICAL_DECOMPOSITION402* @see java.text.Collator#FULL_DECOMPOSITION403* @exception IllegalArgumentException If the given value is not a valid decomposition404* mode.405*/406public synchronized void setDecomposition(int decompositionMode) {407if ((decompositionMode != NO_DECOMPOSITION) &&408(decompositionMode != CANONICAL_DECOMPOSITION) &&409(decompositionMode != FULL_DECOMPOSITION)) {410throw new IllegalArgumentException("Wrong decomposition mode.");411}412decmp = decompositionMode;413}414415/**416* Returns an array of all locales for which the417* <code>getInstance</code> methods of this class can return418* localized instances.419* The returned array represents the union of locales supported420* by the Java runtime and by installed421* {@link java.text.spi.CollatorProvider CollatorProvider} implementations.422* It must contain at least a Locale instance equal to423* {@link java.util.Locale#US Locale.US}.424*425* @return An array of locales for which localized426* <code>Collator</code> instances are available.427*/428public static synchronized Locale[] getAvailableLocales() {429LocaleServiceProviderPool pool =430LocaleServiceProviderPool.getPool(CollatorProvider.class);431return pool.getAvailableLocales();432}433434/**435* Overrides Cloneable436*/437@Override438public Object clone()439{440try {441return (Collator)super.clone();442} catch (CloneNotSupportedException e) {443throw new InternalError(e);444}445}446447/**448* Compares the equality of two Collators.449* @param that the Collator to be compared with this.450* @return true if this Collator is the same as that Collator;451* false otherwise.452*/453@Override454public boolean equals(Object that)455{456if (this == that) {457return true;458}459if (that == null) {460return false;461}462if (getClass() != that.getClass()) {463return false;464}465Collator other = (Collator) that;466return ((strength == other.strength) &&467(decmp == other.decmp));468}469470/**471* Generates the hash code for this Collator.472*/473@Override474abstract public int hashCode();475476/**477* Default constructor. This constructor is478* protected so subclasses can get access to it. Users typically create479* a Collator sub-class by calling the factory method getInstance.480* @see java.text.Collator#getInstance481*/482protected Collator()483{484strength = TERTIARY;485decmp = CANONICAL_DECOMPOSITION;486}487488private int strength = 0;489private int decmp = 0;490private static final ConcurrentMap<Locale, SoftReference<Collator>> cache491= new ConcurrentHashMap<>();492493//494// FIXME: These three constants should be removed.495//496/**497* LESS is returned if source string is compared to be less than target498* string in the compare() method.499* @see java.text.Collator#compare500*/501final static int LESS = -1;502/**503* EQUAL is returned if source string is compared to be equal to target504* string in the compare() method.505* @see java.text.Collator#compare506*/507final static int EQUAL = 0;508/**509* GREATER is returned if source string is compared to be greater than510* target string in the compare() method.511* @see java.text.Collator#compare512*/513final static int GREATER = 1;514}515516517