Path: blob/master/thirdparty/icu4c/common/localebuilder.cpp
9903 views
// © 2019 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html23#include <optional>4#include <string_view>5#include <utility>67#include "bytesinkutil.h" // StringByteSink<CharString>8#include "charstr.h"9#include "cstring.h"10#include "ulocimp.h"11#include "unicode/localebuilder.h"12#include "unicode/locid.h"1314namespace {1516inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }17inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }1819constexpr const char* kAttributeKey = "attribute";2021bool _isExtensionSubtags(char key, const char* s, int32_t len) {22switch (uprv_tolower(key)) {23case 'u':24return ultag_isUnicodeExtensionSubtags(s, len);25case 't':26return ultag_isTransformedExtensionSubtags(s, len);27case 'x':28return ultag_isPrivateuseValueSubtags(s, len);29default:30return ultag_isExtensionSubtags(s, len);31}32}3334} // namespace3536U_NAMESPACE_BEGIN3738LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),39script_(), region_(), variant_(nullptr), extensions_(nullptr)40{41language_[0] = 0;42script_[0] = 0;43region_[0] = 0;44}4546LocaleBuilder::~LocaleBuilder()47{48delete variant_;49delete extensions_;50}5152LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)53{54clear();55setLanguage(locale.getLanguage());56setScript(locale.getScript());57setRegion(locale.getCountry());58setVariant(locale.getVariant());59extensions_ = locale.clone();60if (extensions_ == nullptr) {61status_ = U_MEMORY_ALLOCATION_ERROR;62}63return *this;64}6566LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)67{68Locale l = Locale::forLanguageTag(tag, status_);69if (U_FAILURE(status_)) { return *this; }70// Because setLocale will reset status_ we need to return71// first if we have error in forLanguageTag.72setLocale(l);73return *this;74}7576namespace {7778void setField(StringPiece input, char* dest, UErrorCode& errorCode,79bool (*test)(const char*, int32_t)) {80if (U_FAILURE(errorCode)) { return; }81if (input.empty()) {82dest[0] = '\0';83} else if (test(input.data(), input.length())) {84uprv_memcpy(dest, input.data(), input.length());85dest[input.length()] = '\0';86} else {87errorCode = U_ILLEGAL_ARGUMENT_ERROR;88}89}9091} // namespace9293LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)94{95setField(language, language_, status_, &ultag_isLanguageSubtag);96return *this;97}9899LocaleBuilder& LocaleBuilder::setScript(StringPiece script)100{101setField(script, script_, status_, &ultag_isScriptSubtag);102return *this;103}104105LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)106{107setField(region, region_, status_, &ultag_isRegionSubtag);108return *this;109}110111namespace {112113void transform(char* data, int32_t len) {114for (int32_t i = 0; i < len; i++, data++) {115if (*data == '_') {116*data = '-';117} else {118*data = uprv_tolower(*data);119}120}121}122123} // namespace124125LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)126{127if (U_FAILURE(status_)) { return *this; }128if (variant.empty()) {129delete variant_;130variant_ = nullptr;131return *this;132}133CharString* new_variant = new CharString(variant, status_);134if (U_FAILURE(status_)) { return *this; }135if (new_variant == nullptr) {136status_ = U_MEMORY_ALLOCATION_ERROR;137return *this;138}139transform(new_variant->data(), new_variant->length());140if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {141delete new_variant;142status_ = U_ILLEGAL_ARGUMENT_ERROR;143return *this;144}145delete variant_;146variant_ = new_variant;147return *this;148}149150namespace {151152bool153_isKeywordValue(const char* key, const char* value, int32_t value_len)154{155if (key[1] == '\0') {156// one char key157return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&158_isExtensionSubtags(key[0], value, value_len));159} else if (uprv_strcmp(key, kAttributeKey) == 0) {160// unicode attributes161return ultag_isUnicodeLocaleAttributes(value, value_len);162}163// otherwise: unicode extension value164// We need to convert from legacy key/value to unicode165// key/value166std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKeyWithFallback(key);167std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value);168169return unicode_locale_key.has_value() &&170unicode_locale_type.has_value() &&171ultag_isUnicodeLocaleKey(unicode_locale_key->data(),172static_cast<int32_t>(unicode_locale_key->size())) &&173ultag_isUnicodeLocaleType(unicode_locale_type->data(),174static_cast<int32_t>(unicode_locale_type->size()));175}176177void178_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,179Locale& to, bool validate, UErrorCode& errorCode)180{181if (U_FAILURE(errorCode)) { return; }182LocalPointer<icu::StringEnumeration> ownedKeywords;183if (keywords == nullptr) {184ownedKeywords.adoptInstead(from.createKeywords(errorCode));185if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }186keywords = ownedKeywords.getAlias();187}188const char* key;189while ((key = keywords->next(nullptr, errorCode)) != nullptr) {190auto value = from.getKeywordValue<CharString>(key, errorCode);191if (U_FAILURE(errorCode)) { return; }192if (uprv_strcmp(key, kAttributeKey) == 0) {193transform(value.data(), value.length());194}195if (validate &&196!_isKeywordValue(key, value.data(), value.length())) {197errorCode = U_ILLEGAL_ARGUMENT_ERROR;198return;199}200to.setKeywordValue(key, value.data(), errorCode);201if (U_FAILURE(errorCode)) { return; }202}203}204205void206_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)207{208if (U_FAILURE(errorCode)) { return; }209// Clear Unicode attributes210locale.setKeywordValue(kAttributeKey, "", errorCode);211212// Clear all Unicode keyword values213LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));214if (U_FAILURE(errorCode) || iter.isNull()) { return; }215const char* key;216while ((key = iter->next(nullptr, errorCode)) != nullptr) {217locale.setUnicodeKeywordValue(key, nullptr, errorCode);218}219}220221void222_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)223{224if (U_FAILURE(errorCode)) { return; }225// Add the unicode extensions to extensions_226CharString locale_str("und-u-", errorCode);227locale_str.append(value, errorCode);228_copyExtensions(229Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,230locale, false, errorCode);231}232233} // namespace234235LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)236{237if (U_FAILURE(status_)) { return *this; }238if (!UPRV_ISALPHANUM(key)) {239status_ = U_ILLEGAL_ARGUMENT_ERROR;240return *this;241}242CharString value_str(value, status_);243if (U_FAILURE(status_)) { return *this; }244transform(value_str.data(), value_str.length());245if (!value_str.isEmpty() &&246!_isExtensionSubtags(key, value_str.data(), value_str.length())) {247status_ = U_ILLEGAL_ARGUMENT_ERROR;248return *this;249}250if (extensions_ == nullptr) {251extensions_ = Locale::getRoot().clone();252if (extensions_ == nullptr) {253status_ = U_MEMORY_ALLOCATION_ERROR;254return *this;255}256}257if (uprv_tolower(key) != 'u') {258// for t, x and others extension.259extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),260status_);261return *this;262}263_clearUAttributesAndKeyType(*extensions_, status_);264if (U_FAILURE(status_)) { return *this; }265if (!value.empty()) {266_setUnicodeExtensions(*extensions_, value_str, status_);267}268return *this;269}270271LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(272StringPiece key, StringPiece type)273{274if (U_FAILURE(status_)) { return *this; }275if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||276(!type.empty() &&277!ultag_isUnicodeLocaleType(type.data(), type.length()))) {278status_ = U_ILLEGAL_ARGUMENT_ERROR;279return *this;280}281if (extensions_ == nullptr) {282extensions_ = Locale::getRoot().clone();283if (extensions_ == nullptr) {284status_ = U_MEMORY_ALLOCATION_ERROR;285return *this;286}287}288extensions_->setUnicodeKeywordValue(key, type, status_);289return *this;290}291292LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(293StringPiece value)294{295CharString value_str(value, status_);296if (U_FAILURE(status_)) { return *this; }297transform(value_str.data(), value_str.length());298if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {299status_ = U_ILLEGAL_ARGUMENT_ERROR;300return *this;301}302if (extensions_ == nullptr) {303extensions_ = Locale::getRoot().clone();304if (extensions_ == nullptr) {305status_ = U_MEMORY_ALLOCATION_ERROR;306return *this;307}308extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);309return *this;310}311312UErrorCode localErrorCode = U_ZERO_ERROR;313auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);314if (U_FAILURE(localErrorCode)) {315CharString new_attributes(value_str.data(), status_);316// No attributes, set the attribute.317extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);318return *this;319}320321transform(attributes.data(),attributes.length());322const char* start = attributes.data();323const char* limit = attributes.data() + attributes.length();324CharString new_attributes;325bool inserted = false;326while (start < limit) {327if (!inserted) {328int cmp = uprv_strcmp(start, value_str.data());329if (cmp == 0) { return *this; } // Found it in attributes: Just return330if (cmp > 0) {331if (!new_attributes.isEmpty()) new_attributes.append('_', status_);332new_attributes.append(value_str.data(), status_);333inserted = true;334}335}336if (!new_attributes.isEmpty()) {337new_attributes.append('_', status_);338}339new_attributes.append(start, status_);340start += uprv_strlen(start) + 1;341}342if (!inserted) {343if (!new_attributes.isEmpty()) {344new_attributes.append('_', status_);345}346new_attributes.append(value_str.data(), status_);347}348// Not yet in the attributes, set the attribute.349extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);350return *this;351}352353LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(354StringPiece value)355{356CharString value_str(value, status_);357if (U_FAILURE(status_)) { return *this; }358transform(value_str.data(), value_str.length());359if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {360status_ = U_ILLEGAL_ARGUMENT_ERROR;361return *this;362}363if (extensions_ == nullptr) { return *this; }364UErrorCode localErrorCode = U_ZERO_ERROR;365auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);366// get failure, just return367if (U_FAILURE(localErrorCode)) { return *this; }368// Do not have any attributes, just return.369if (attributes.isEmpty()) { return *this; }370371char* p = attributes.data();372// Replace null terminiator in place for _ and - so later373// we can use uprv_strcmp to compare.374for (int32_t i = 0; i < attributes.length(); i++, p++) {375*p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);376}377378const char* start = attributes.data();379const char* limit = attributes.data() + attributes.length();380CharString new_attributes;381bool found = false;382while (start < limit) {383if (uprv_strcmp(start, value_str.data()) == 0) {384found = true;385} else {386if (!new_attributes.isEmpty()) {387new_attributes.append('_', status_);388}389new_attributes.append(start, status_);390}391start += uprv_strlen(start) + 1;392}393// Found the value in attributes, set the attribute.394if (found) {395extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);396}397return *this;398}399400LocaleBuilder& LocaleBuilder::clear()401{402status_ = U_ZERO_ERROR;403language_[0] = 0;404script_[0] = 0;405region_[0] = 0;406delete variant_;407variant_ = nullptr;408clearExtensions();409return *this;410}411412LocaleBuilder& LocaleBuilder::clearExtensions()413{414delete extensions_;415extensions_ = nullptr;416return *this;417}418419Locale makeBogusLocale() {420Locale bogus;421bogus.setToBogus();422return bogus;423}424425void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)426{427if (U_FAILURE(errorCode)) { return; }428LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));429if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {430// Error, or no extensions to copy.431return;432}433if (extensions_ == nullptr) {434extensions_ = Locale::getRoot().clone();435if (extensions_ == nullptr) {436status_ = U_MEMORY_ALLOCATION_ERROR;437return;438}439}440_copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);441}442443Locale LocaleBuilder::build(UErrorCode& errorCode)444{445if (U_FAILURE(errorCode)) {446return makeBogusLocale();447}448if (U_FAILURE(status_)) {449errorCode = status_;450return makeBogusLocale();451}452CharString locale_str(language_, errorCode);453if (uprv_strlen(script_) > 0) {454locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);455}456if (uprv_strlen(region_) > 0) {457locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);458}459if (variant_ != nullptr) {460locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);461}462if (U_FAILURE(errorCode)) {463return makeBogusLocale();464}465Locale product(locale_str.data());466if (extensions_ != nullptr) {467_copyExtensions(*extensions_, nullptr, product, true, errorCode);468}469if (U_FAILURE(errorCode)) {470return makeBogusLocale();471}472return product;473}474475UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {476if (U_FAILURE(outErrorCode)) {477// Do not overwrite the older error code478return true;479}480outErrorCode = status_;481return U_FAILURE(outErrorCode);482}483484U_NAMESPACE_END485486487