Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/localematcher.h
38827 views
// © 2019 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html#License23// localematcher.h4// created: 2019may08 Markus W. Scherer56#ifndef __LOCALEMATCHER_H__7#define __LOCALEMATCHER_H__89#include "unicode/utypes.h"1011#if U_SHOW_CPLUSPLUS_API1213#include "unicode/locid.h"14#include "unicode/stringpiece.h"15#include "unicode/uobject.h"1617/**18* \file19* \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.20*/2122#ifndef U_HIDE_DRAFT_API2324/**25* Builder option for whether the language subtag or the script subtag is most important.26*27* @see Builder#setFavorSubtag(FavorSubtag)28* @draft ICU 6529*/30enum ULocMatchFavorSubtag {31/**32* Language differences are most important, then script differences, then region differences.33* (This is the default behavior.)34*35* @draft ICU 6536*/37ULOCMATCH_FAVOR_LANGUAGE,38/**39* Makes script differences matter relatively more than language differences.40*41* @draft ICU 6542*/43ULOCMATCH_FAVOR_SCRIPT44};45#ifndef U_IN_DOXYGEN46typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;47#endif4849/**50* Builder option for whether all desired locales are treated equally or51* earlier ones are preferred.52*53* @see Builder#setDemotionPerDesiredLocale(Demotion)54* @draft ICU 6555*/56enum ULocMatchDemotion {57/**58* All desired locales are treated equally.59*60* @draft ICU 6561*/62ULOCMATCH_DEMOTION_NONE,63/**64* Earlier desired locales are preferred.65*66* <p>From each desired locale to the next,67* the distance to any supported locale is increased by an additional amount68* which is at least as large as most region mismatches.69* A later desired locale has to have a better match with some supported locale70* due to more than merely having the same region subtag.71*72* <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>73* yields <code>Result(en-GB, en)</code> because74* with the demotion of sv its perfect match is no better than75* the region distance between the earlier desired locale en-GB and en=en-US.76*77* <p>Notes:78* <ul>79* <li>In some cases, language and/or script differences can be as small as80* the typical region difference. (Example: sr-Latn vs. sr-Cyrl)81* <li>It is possible for certain region differences to be larger than usual,82* and larger than the demotion.83* (As of CLDR 35 there is no such case, but84* this is possible in future versions of the data.)85* </ul>86*87* @draft ICU 6588*/89ULOCMATCH_DEMOTION_REGION90};91#ifndef U_IN_DOXYGEN92typedef enum ULocMatchDemotion ULocMatchDemotion;93#endif9495struct UHashtable;9697U_NAMESPACE_BEGIN9899struct LSR;100101class LocaleDistance;102class LocaleLsrIterator;103class UVector;104class XLikelySubtags;105106/**107* Immutable class that picks the best match between a user's desired locales and108* an application's supported locales.109* Movable but not copyable.110*111* <p>Example:112* <pre>113* UErrorCode errorCode = U_ZERO_ERROR;114* LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);115* Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en"116* </pre>117*118* <p>A matcher takes into account when languages are close to one another,119* such as Danish and Norwegian,120* and when regional variants are close, like en-GB and en-AU as opposed to en-US.121*122* <p>If there are multiple supported locales with the same (language, script, region)123* likely subtags, then the current implementation returns the first of those locales.124* It ignores variant subtags (except for pseudolocale variants) and extensions.125* This may change in future versions.126*127* <p>For example, the current implementation does not distinguish between128* de, de-DE, de-Latn, de-1901, de-u-co-phonebk.129*130* <p>If you prefer one equivalent locale over another, then provide only the preferred one,131* or place it earlier in the list of supported locales.132*133* <p>Otherwise, the order of supported locales may have no effect on the best-match results.134* The current implementation compares each desired locale with supported locales135* in the following order:136* 1. Default locale, if supported;137* 2. CLDR "paradigm locales" like en-GB and es-419;138* 3. other supported locales.139* This may change in future versions.140*141* <p>Often a product will just need one matcher instance, built with the languages142* that it supports. However, it may want multiple instances with different143* default languages based on additional information, such as the domain.144*145* <p>This class is not intended for public subclassing.146*147* @draft ICU 65148*/149class U_COMMON_API LocaleMatcher : public UMemory {150public:151/**152* Data for the best-matching pair of a desired and a supported locale.153* Movable but not copyable.154*155* @draft ICU 65156*/157class U_COMMON_API Result : public UMemory {158public:159/**160* Move constructor; might modify the source.161* This object will have the same contents that the source object had.162*163* @param src Result to move contents from.164* @draft ICU 65165*/166Result(Result &&src) U_NOEXCEPT;167168/**169* Destructor.170*171* @draft ICU 65172*/173~Result();174175/**176* Move assignment; might modify the source.177* This object will have the same contents that the source object had.178*179* @param src Result to move contents from.180* @draft ICU 65181*/182Result &operator=(Result &&src) U_NOEXCEPT;183184/**185* Returns the best-matching desired locale.186* nullptr if the list of desired locales is empty or if none matched well enough.187*188* @return the best-matching desired locale, or nullptr.189* @draft ICU 65190*/191inline const Locale *getDesiredLocale() const { return desiredLocale; }192193/**194* Returns the best-matching supported locale.195* If none matched well enough, this is the default locale.196* The default locale is nullptr if the list of supported locales is empty and197* no explicit default locale is set.198*199* @return the best-matching supported locale, or nullptr.200* @draft ICU 65201*/202inline const Locale *getSupportedLocale() const { return supportedLocale; }203204/**205* Returns the index of the best-matching desired locale in the input Iterable order.206* -1 if the list of desired locales is empty or if none matched well enough.207*208* @return the index of the best-matching desired locale, or -1.209* @draft ICU 65210*/211inline int32_t getDesiredIndex() const { return desiredIndex; }212213/**214* Returns the index of the best-matching supported locale in the215* constructor’s or builder’s input order (“set” Collection plus “added” locales).216* If the matcher was built from a locale list string, then the iteration order is that217* of a LocalePriorityList built from the same string.218* -1 if the list of supported locales is empty or if none matched well enough.219*220* @return the index of the best-matching supported locale, or -1.221* @draft ICU 65222*/223inline int32_t getSupportedIndex() const { return supportedIndex; }224225/**226* Takes the best-matching supported locale and adds relevant fields of the227* best-matching desired locale, such as the -t- and -u- extensions.228* May replace some fields of the supported locale.229* The result is the locale that should be used for date and number formatting, collation, etc.230* Returns the root locale if getSupportedLocale() returns nullptr.231*232* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn233*234* @return a locale combining the best-matching desired and supported locales.235* @draft ICU 65236*/237Locale makeResolvedLocale(UErrorCode &errorCode) const;238239private:240Result(const Locale *desired, const Locale *supported,241int32_t desIndex, int32_t suppIndex, UBool owned) :242desiredLocale(desired), supportedLocale(supported),243desiredIndex(desIndex), supportedIndex(suppIndex),244desiredIsOwned(owned) {}245246Result(const Result &other) = delete;247Result &operator=(const Result &other) = delete;248249const Locale *desiredLocale;250const Locale *supportedLocale;251int32_t desiredIndex;252int32_t supportedIndex;253UBool desiredIsOwned;254255friend class LocaleMatcher;256};257258/**259* LocaleMatcher builder.260* Movable but not copyable.261*262* @see LocaleMatcher#builder()263* @draft ICU 65264*/265class U_COMMON_API Builder : public UMemory {266public:267/**268* Constructs a builder used in chaining parameters for building a LocaleMatcher.269*270* @return a new Builder object271* @draft ICU 65272*/273Builder() {}274275/**276* Move constructor; might modify the source.277* This builder will have the same contents that the source builder had.278*279* @param src Builder to move contents from.280* @draft ICU 65281*/282Builder(Builder &&src) U_NOEXCEPT;283284/**285* Destructor.286*287* @draft ICU 65288*/289~Builder();290291/**292* Move assignment; might modify the source.293* This builder will have the same contents that the source builder had.294*295* @param src Builder to move contents from.296* @draft ICU 65297*/298Builder &operator=(Builder &&src) U_NOEXCEPT;299300/**301* Parses an Accept-Language string302* (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),303* such as "af, en, fr;q=0.9", and sets the supported locales accordingly.304* Allows whitespace in more places but does not allow "*".305* Clears any previously set/added supported locales first.306*307* @param locales the Accept-Language string of locales to set308* @return this Builder object309* @draft ICU 65310*/311Builder &setSupportedLocalesFromListString(StringPiece locales);312313/**314* Copies the supported locales, preserving iteration order.315* Clears any previously set/added supported locales first.316* Duplicates are allowed, and are not removed.317*318* @param locales the list of locale319* @return this Builder object320* @draft ICU 65321*/322Builder &setSupportedLocales(Locale::Iterator &locales);323324/**325* Copies the supported locales from the begin/end range, preserving iteration order.326* Clears any previously set/added supported locales first.327* Duplicates are allowed, and are not removed.328*329* Each of the iterator parameter values must be an330* input iterator whose value is convertible to const Locale &.331*332* @param begin Start of range.333* @param end Exclusive end of range.334* @return this Builder object335* @draft ICU 65336*/337template<typename Iter>338Builder &setSupportedLocales(Iter begin, Iter end) {339if (U_FAILURE(errorCode_)) { return *this; }340clearSupportedLocales();341while (begin != end) {342addSupportedLocale(*begin++);343}344return *this;345}346347/**348* Copies the supported locales from the begin/end range, preserving iteration order.349* Calls the converter to convert each *begin to a Locale or const Locale &.350* Clears any previously set/added supported locales first.351* Duplicates are allowed, and are not removed.352*353* Each of the iterator parameter values must be an354* input iterator whose value is convertible to const Locale &.355*356* @param begin Start of range.357* @param end Exclusive end of range.358* @param converter Converter from *begin to const Locale & or compatible.359* @return this Builder object360* @draft ICU 65361*/362template<typename Iter, typename Conv>363Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) {364if (U_FAILURE(errorCode_)) { return *this; }365clearSupportedLocales();366while (begin != end) {367addSupportedLocale(converter(*begin++));368}369return *this;370}371372/**373* Adds another supported locale.374* Duplicates are allowed, and are not removed.375*376* @param locale another locale377* @return this Builder object378* @draft ICU 65379*/380Builder &addSupportedLocale(const Locale &locale);381382/**383* Sets the default locale; if nullptr, or if it is not set explicitly,384* then the first supported locale is used as the default locale.385*386* @param defaultLocale the default locale (will be copied)387* @return this Builder object388* @draft ICU 65389*/390Builder &setDefaultLocale(const Locale *defaultLocale);391392/**393* If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script394* differences.395* This is used in situations (such as maps) where396* it is better to fall back to the same script than a similar language.397*398* @param subtag the subtag to favor399* @return this Builder object400* @draft ICU 65401*/402Builder &setFavorSubtag(ULocMatchFavorSubtag subtag);403404/**405* Option for whether all desired locales are treated equally or406* earlier ones are preferred (this is the default).407*408* @param demotion the demotion per desired locale to set.409* @return this Builder object410* @draft ICU 65411*/412Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);413414/**415* Sets the UErrorCode if an error occurred while setting parameters.416* Preserves older error codes in the outErrorCode.417*418* @param outErrorCode Set to an error code if it does not contain one already419* and an error occurred while setting parameters.420* Otherwise unchanged.421* @return TRUE if U_FAILURE(outErrorCode)422* @draft ICU 65423*/424UBool copyErrorTo(UErrorCode &outErrorCode) const;425426/**427* Builds and returns a new locale matcher.428* This builder can continue to be used.429*430* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,431* or else the function returns immediately. Check for U_FAILURE()432* on output or use with function chaining. (See User Guide for details.)433* @return new LocaleMatcher.434* @draft ICU 65435*/436LocaleMatcher build(UErrorCode &errorCode) const;437438private:439friend class LocaleMatcher;440441Builder(const Builder &other) = delete;442Builder &operator=(const Builder &other) = delete;443444void clearSupportedLocales();445bool ensureSupportedLocaleVector();446447UErrorCode errorCode_ = U_ZERO_ERROR;448UVector *supportedLocales_ = nullptr;449int32_t thresholdDistance_ = -1;450ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION;451Locale *defaultLocale_ = nullptr;452ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;453};454455// FYI No public LocaleMatcher constructors in C++; use the Builder.456457/**458* Move copy constructor; might modify the source.459* This matcher will have the same settings that the source matcher had.460* @param src source matcher461* @draft ICU 65462*/463LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT;464465/**466* Destructor.467* @draft ICU 65468*/469~LocaleMatcher();470471/**472* Move assignment operator; might modify the source.473* This matcher will have the same settings that the source matcher had.474* The behavior is undefined if *this and src are the same object.475* @param src source matcher476* @return *this477* @draft ICU 65478*/479LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT;480481/**482* Returns the supported locale which best matches the desired locale.483*484* @param desiredLocale Typically a user's language.485* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,486* or else the function returns immediately. Check for U_FAILURE()487* on output or use with function chaining. (See User Guide for details.)488* @return the best-matching supported locale.489* @draft ICU 65490*/491const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const;492493/**494* Returns the supported locale which best matches one of the desired locales.495*496* @param desiredLocales Typically a user's languages, in order of preference (descending).497* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,498* or else the function returns immediately. Check for U_FAILURE()499* on output or use with function chaining. (See User Guide for details.)500* @return the best-matching supported locale.501* @draft ICU 65502*/503const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;504505/**506* Parses an Accept-Language string507* (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),508* such as "af, en, fr;q=0.9",509* and returns the supported locale which best matches one of the desired locales.510* Allows whitespace in more places but does not allow "*".511*512* @param desiredLocaleList Typically a user's languages, as an Accept-Language string.513* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,514* or else the function returns immediately. Check for U_FAILURE()515* on output or use with function chaining. (See User Guide for details.)516* @return the best-matching supported locale.517* @draft ICU 65518*/519const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const;520521/**522* Returns the best match between the desired locale and the supported locales.523* If the result's desired locale is not nullptr, then it is the address of the input locale.524* It has not been cloned.525*526* @param desiredLocale Typically a user's language.527* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,528* or else the function returns immediately. Check for U_FAILURE()529* on output or use with function chaining. (See User Guide for details.)530* @return the best-matching pair of the desired and a supported locale.531* @draft ICU 65532*/533Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const;534535/**536* Returns the best match between the desired and supported locales.537* If the result's desired locale is not nullptr, then it is a clone of538* the best-matching desired locale. The Result object owns the clone.539*540* @param desiredLocales Typically a user's languages, in order of preference (descending).541* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,542* or else the function returns immediately. Check for U_FAILURE()543* on output or use with function chaining. (See User Guide for details.)544* @return the best-matching pair of a desired and a supported locale.545* @draft ICU 65546*/547Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;548549#ifndef U_HIDE_INTERNAL_API550/**551* Returns a fraction between 0 and 1, where 1 means that the languages are a552* perfect match, and 0 means that they are completely different.553*554* <p>This is mostly an implementation detail, and the precise values may change over time.555* The implementation may use either the maximized forms or the others ones, or both.556* The implementation may or may not rely on the forms to be consistent with each other.557*558* <p>Callers should construct and use a matcher rather than match pairs of locales directly.559*560* @param desired Desired locale.561* @param supported Supported locale.562* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,563* or else the function returns immediately. Check for U_FAILURE()564* on output or use with function chaining. (See User Guide for details.)565* @return value between 0 and 1, inclusive.566* @internal (has a known user)567*/568double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;569#endif // U_HIDE_INTERNAL_API570571private:572LocaleMatcher(const Builder &builder, UErrorCode &errorCode);573LocaleMatcher(const LocaleMatcher &other) = delete;574LocaleMatcher &operator=(const LocaleMatcher &other) = delete;575576int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;577578const XLikelySubtags &likelySubtags;579const LocaleDistance &localeDistance;580int32_t thresholdDistance;581int32_t demotionPerDesiredLocale;582ULocMatchFavorSubtag favorSubtag;583584// These are in input order.585const Locale ** supportedLocales;586LSR *lsrs;587int32_t supportedLocalesLength;588// These are in preference order: 1. Default locale 2. paradigm locales 3. others.589UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found"590// Array versions of the supportedLsrToIndex keys and values.591// The distance lookup loops over the supportedLSRs and returns the index of the best match.592const LSR **supportedLSRs;593int32_t *supportedIndexes;594int32_t supportedLSRsLength;595Locale *ownedDefaultLocale;596const Locale *defaultLocale;597int32_t defaultLocaleIndex;598};599600U_NAMESPACE_END601602#endif // U_HIDE_DRAFT_API603#endif // U_SHOW_CPLUSPLUS_API604#endif // __LOCALEMATCHER_H__605606607