Path: blob/master/thirdparty/icu4c/common/loadednormalizer2impl.cpp
9902 views
// © 2016 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html2/*3*******************************************************************************4* Copyright (C) 2014, International Business Machines5* Corporation and others. All Rights Reserved.6*******************************************************************************7* loadednormalizer2impl.cpp8*9* created on: 2014sep0310* created by: Markus W. Scherer11*/1213#include "unicode/utypes.h"1415#if !UCONFIG_NO_NORMALIZATION1617#include "unicode/udata.h"18#include "unicode/localpointer.h"19#include "unicode/normalizer2.h"20#include "unicode/ucptrie.h"21#include "unicode/unistr.h"22#include "unicode/unorm.h"23#include "cstring.h"24#include "mutex.h"25#include "norm2allmodes.h"26#include "normalizer2impl.h"27#include "uassert.h"28#include "ucln_cmn.h"29#include "uhash.h"3031U_NAMESPACE_BEGIN3233class LoadedNormalizer2Impl : public Normalizer2Impl {34public:35LoadedNormalizer2Impl() : memory(nullptr), ownedTrie(nullptr) {}36virtual ~LoadedNormalizer2Impl();3738void load(const char *packageName, const char *name, UErrorCode &errorCode);3940private:41static UBool U_CALLCONV42isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);4344UDataMemory *memory;45UCPTrie *ownedTrie;46};4748LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {49udata_close(memory);50ucptrie_close(ownedTrie);51}5253UBool U_CALLCONV54LoadedNormalizer2Impl::isAcceptable(void * /*context*/,55const char * /* type */, const char * /*name*/,56const UDataInfo *pInfo) {57if(58pInfo->size>=20 &&59pInfo->isBigEndian==U_IS_BIG_ENDIAN &&60pInfo->charsetFamily==U_CHARSET_FAMILY &&61pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */62pInfo->dataFormat[1]==0x72 &&63pInfo->dataFormat[2]==0x6d &&64pInfo->dataFormat[3]==0x32 &&65pInfo->formatVersion[0]==566) {67// Normalizer2Impl *me=(Normalizer2Impl *)context;68// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);69return true;70} else {71return false;72}73}7475void76LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {77if(U_FAILURE(errorCode)) {78return;79}80memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);81if(U_FAILURE(errorCode)) {82return;83}84const uint8_t* inBytes = static_cast<const uint8_t*>(udata_getMemory(memory));85const int32_t* inIndexes = reinterpret_cast<const int32_t*>(inBytes);86int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;87if(indexesLength<=IX_MIN_LCCC_CP) {88errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.89return;90}9192int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];93int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];94ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,95inBytes+offset, nextOffset-offset, nullptr,96&errorCode);97if(U_FAILURE(errorCode)) {98return;99}100101offset=nextOffset;102nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];103const uint16_t* inExtraData = reinterpret_cast<const uint16_t*>(inBytes + offset);104105// smallFCD: new in formatVersion 2106offset=nextOffset;107const uint8_t *inSmallFCD=inBytes+offset;108109init(inIndexes, ownedTrie, inExtraData, inSmallFCD);110}111112// instance cache ---------------------------------------------------------- ***113114Norm2AllModes *115Norm2AllModes::createInstance(const char *packageName,116const char *name,117UErrorCode &errorCode) {118if(U_FAILURE(errorCode)) {119return nullptr;120}121LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;122if(impl==nullptr) {123errorCode=U_MEMORY_ALLOCATION_ERROR;124return nullptr;125}126impl->load(packageName, name, errorCode);127return createInstance(impl, errorCode);128}129130U_CDECL_BEGIN131static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();132U_CDECL_END133134#if !NORM2_HARDCODE_NFC_DATA135static Norm2AllModes *nfcSingleton;136static icu::UInitOnce nfcInitOnce {};137#endif138139static Norm2AllModes *nfkcSingleton;140static icu::UInitOnce nfkcInitOnce {};141142static Norm2AllModes *nfkc_cfSingleton;143static icu::UInitOnce nfkc_cfInitOnce {};144145static Norm2AllModes *nfkc_scfSingleton;146static icu::UInitOnce nfkc_scfInitOnce {};147148static UHashtable *cache=nullptr;149150// UInitOnce singleton initialization function151static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {152#if !NORM2_HARDCODE_NFC_DATA153if (uprv_strcmp(what, "nfc") == 0) {154nfcSingleton = Norm2AllModes::createInstance(nullptr, "nfc", errorCode);155} else156#endif157if (uprv_strcmp(what, "nfkc") == 0) {158nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);159} else if (uprv_strcmp(what, "nfkc_cf") == 0) {160nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);161} else if (uprv_strcmp(what, "nfkc_scf") == 0) {162nfkc_scfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_scf", errorCode);163} else {164UPRV_UNREACHABLE_EXIT; // Unknown singleton165}166ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);167}168169U_CDECL_BEGIN170171static void U_CALLCONV deleteNorm2AllModes(void *allModes) {172delete (Norm2AllModes *)allModes;173}174175static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {176#if !NORM2_HARDCODE_NFC_DATA177delete nfcSingleton;178nfcSingleton = nullptr;179nfcInitOnce.reset();180#endif181182delete nfkcSingleton;183nfkcSingleton = nullptr;184nfkcInitOnce.reset();185186delete nfkc_cfSingleton;187nfkc_cfSingleton = nullptr;188nfkc_cfInitOnce.reset();189190delete nfkc_scfSingleton;191nfkc_scfSingleton = nullptr;192nfkc_scfInitOnce.reset();193194uhash_close(cache);195cache=nullptr;196return true;197}198199U_CDECL_END200201#if !NORM2_HARDCODE_NFC_DATA202const Norm2AllModes *203Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {204if(U_FAILURE(errorCode)) { return nullptr; }205umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);206return nfcSingleton;207}208#endif209210const Norm2AllModes *211Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {212if(U_FAILURE(errorCode)) { return nullptr; }213umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);214return nfkcSingleton;215}216217const Norm2AllModes *218Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {219if(U_FAILURE(errorCode)) { return nullptr; }220umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);221return nfkc_cfSingleton;222}223224const Norm2AllModes *225Norm2AllModes::getNFKC_SCFInstance(UErrorCode &errorCode) {226if(U_FAILURE(errorCode)) { return nullptr; }227umtx_initOnce(nfkc_scfInitOnce, &initSingletons, "nfkc_scf", errorCode);228return nfkc_scfSingleton;229}230231#if !NORM2_HARDCODE_NFC_DATA232const Normalizer2 *233Normalizer2::getNFCInstance(UErrorCode &errorCode) {234const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);235return allModes!=nullptr ? &allModes->comp : nullptr;236}237238const Normalizer2 *239Normalizer2::getNFDInstance(UErrorCode &errorCode) {240const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);241return allModes!=nullptr ? &allModes->decomp : nullptr;242}243244const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {245const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);246return allModes!=nullptr ? &allModes->fcd : nullptr;247}248249const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {250const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);251return allModes!=nullptr ? &allModes->fcc : nullptr;252}253254const Normalizer2Impl *255Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {256const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);257return allModes!=nullptr ? allModes->impl : nullptr;258}259#endif260261const Normalizer2 *262Normalizer2::getNFKCInstance(UErrorCode &errorCode) {263const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);264return allModes!=nullptr ? &allModes->comp : nullptr;265}266267const Normalizer2 *268Normalizer2::getNFKDInstance(UErrorCode &errorCode) {269const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);270return allModes!=nullptr ? &allModes->decomp : nullptr;271}272273const Normalizer2 *274Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {275const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);276return allModes!=nullptr ? &allModes->comp : nullptr;277}278279const Normalizer2 *280Normalizer2::getNFKCSimpleCasefoldInstance(UErrorCode &errorCode) {281const Norm2AllModes *allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);282return allModes!=nullptr ? &allModes->comp : nullptr;283}284285const Normalizer2 *286Normalizer2::getInstance(const char *packageName,287const char *name,288UNormalization2Mode mode,289UErrorCode &errorCode) {290if(U_FAILURE(errorCode)) {291return nullptr;292}293if(name==nullptr || *name==0) {294errorCode=U_ILLEGAL_ARGUMENT_ERROR;295return nullptr;296}297const Norm2AllModes *allModes=nullptr;298if(packageName==nullptr) {299if(0==uprv_strcmp(name, "nfc")) {300allModes=Norm2AllModes::getNFCInstance(errorCode);301} else if(0==uprv_strcmp(name, "nfkc")) {302allModes=Norm2AllModes::getNFKCInstance(errorCode);303} else if(0==uprv_strcmp(name, "nfkc_cf")) {304allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);305} else if(0==uprv_strcmp(name, "nfkc_scf")) {306allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);307}308}309if(allModes==nullptr && U_SUCCESS(errorCode)) {310{311Mutex lock;312if(cache!=nullptr) {313allModes = static_cast<Norm2AllModes*>(uhash_get(cache, name));314}315}316if(allModes==nullptr) {317ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);318LocalPointer<Norm2AllModes> localAllModes(319Norm2AllModes::createInstance(packageName, name, errorCode));320if(U_SUCCESS(errorCode)) {321Mutex lock;322if(cache==nullptr) {323cache=uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode);324if(U_FAILURE(errorCode)) {325return nullptr;326}327uhash_setKeyDeleter(cache, uprv_free);328uhash_setValueDeleter(cache, deleteNorm2AllModes);329}330void *temp=uhash_get(cache, name);331if(temp==nullptr) {332int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);333char* nameCopy = static_cast<char*>(uprv_malloc(keyLength));334if(nameCopy==nullptr) {335errorCode=U_MEMORY_ALLOCATION_ERROR;336return nullptr;337}338uprv_memcpy(nameCopy, name, keyLength);339allModes=localAllModes.getAlias();340uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);341} else {342// race condition343allModes = static_cast<Norm2AllModes*>(temp);344}345}346}347}348if(allModes!=nullptr && U_SUCCESS(errorCode)) {349switch(mode) {350case UNORM2_COMPOSE:351return &allModes->comp;352case UNORM2_DECOMPOSE:353return &allModes->decomp;354case UNORM2_FCD:355return &allModes->fcd;356case UNORM2_COMPOSE_CONTIGUOUS:357return &allModes->fcc;358default:359break; // do nothing360}361}362return nullptr;363}364365const Normalizer2 *366Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {367if(U_FAILURE(errorCode)) {368return nullptr;369}370switch(mode) {371case UNORM_NFD:372return Normalizer2::getNFDInstance(errorCode);373case UNORM_NFKD:374return Normalizer2::getNFKDInstance(errorCode);375case UNORM_NFC:376return Normalizer2::getNFCInstance(errorCode);377case UNORM_NFKC:378return Normalizer2::getNFKCInstance(errorCode);379case UNORM_FCD:380return getFCDInstance(errorCode);381default: // UNORM_NONE382return getNoopInstance(errorCode);383}384}385386const Normalizer2Impl *387Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {388const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);389return allModes!=nullptr ? allModes->impl : nullptr;390}391392const Normalizer2Impl *393Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {394const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);395return allModes!=nullptr ? allModes->impl : nullptr;396}397398U_NAMESPACE_END399400// C API ------------------------------------------------------------------- ***401402U_NAMESPACE_USE403404U_CAPI const UNormalizer2 * U_EXPORT2405unorm2_getNFKCInstance(UErrorCode *pErrorCode) {406return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);407}408409U_CAPI const UNormalizer2 * U_EXPORT2410unorm2_getNFKDInstance(UErrorCode *pErrorCode) {411return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);412}413414U_CAPI const UNormalizer2 * U_EXPORT2415unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {416return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);417}418419U_CAPI const UNormalizer2 * U_EXPORT2420unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode) {421return (const UNormalizer2 *)Normalizer2::getNFKCSimpleCasefoldInstance(*pErrorCode);422}423424U_CAPI const UNormalizer2 * U_EXPORT2425unorm2_getInstance(const char *packageName,426const char *name,427UNormalization2Mode mode,428UErrorCode *pErrorCode) {429return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);430}431432U_CFUNC UNormalizationCheckResult433unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {434if(mode<=UNORM_NONE || UNORM_FCD<=mode) {435return UNORM_YES;436}437UErrorCode errorCode=U_ZERO_ERROR;438const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);439if(U_SUCCESS(errorCode)) {440return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);441} else {442return UNORM_MAYBE;443}444}445446#endif // !UCONFIG_NO_NORMALIZATION447448449