Path: blob/master/thirdparty/icu4c/i18n/scriptset.cpp
9906 views
// © 2016 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html2/*3**********************************************************************4* Copyright (C) 2014, International Business Machines5* Corporation and others. All Rights Reserved.6**********************************************************************7*8* scriptset.cpp9*10* created on: 2013 Jan 711* created by: Andy Heninger12*/1314#include "unicode/utypes.h"1516#include "unicode/uchar.h"17#include "unicode/unistr.h"1819#include "scriptset.h"20#include "uassert.h"21#include "cmemory.h"2223U_NAMESPACE_BEGIN2425//----------------------------------------------------------------------------26//27// ScriptSet implementation28//29//----------------------------------------------------------------------------30ScriptSet::ScriptSet() {31uprv_memset(bits, 0, sizeof(bits));32}3334ScriptSet::~ScriptSet() {35}3637ScriptSet::ScriptSet(const ScriptSet &other) {38*this = other;39}4041ScriptSet & ScriptSet::operator =(const ScriptSet &other) {42uprv_memcpy(bits, other.bits, sizeof(bits));43return *this;44}4546bool ScriptSet::operator == (const ScriptSet &other) const {47for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {48if (bits[i] != other.bits[i]) {49return false;50}51}52return true;53}5455UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {56if (U_FAILURE(status)) {57return false;58}59if (script < 0 || static_cast<int32_t>(script) >= SCRIPT_LIMIT) {60status = U_ILLEGAL_ARGUMENT_ERROR;61return false;62}63uint32_t index = script / 32;64uint32_t bit = 1 << (script & 31);65return ((bits[index] & bit) != 0);66}676869ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {70if (U_FAILURE(status)) {71return *this;72}73if (script < 0 || static_cast<int32_t>(script) >= SCRIPT_LIMIT) {74status = U_ILLEGAL_ARGUMENT_ERROR;75return *this;76}77uint32_t index = script / 32;78uint32_t bit = 1 << (script & 31);79bits[index] |= bit;80return *this;81}8283ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {84if (U_FAILURE(status)) {85return *this;86}87if (script < 0 || static_cast<int32_t>(script) >= SCRIPT_LIMIT) {88status = U_ILLEGAL_ARGUMENT_ERROR;89return *this;90}91uint32_t index = script / 32;92uint32_t bit = 1 << (script & 31);93bits[index] &= ~bit;94return *this;95}96979899ScriptSet &ScriptSet::Union(const ScriptSet &other) {100for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {101bits[i] |= other.bits[i];102}103return *this;104}105106ScriptSet &ScriptSet::intersect(const ScriptSet &other) {107for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {108bits[i] &= other.bits[i];109}110return *this;111}112113ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {114ScriptSet t;115t.set(script, status);116if (U_SUCCESS(status)) {117this->intersect(t);118}119return *this;120}121122UBool ScriptSet::intersects(const ScriptSet &other) const {123for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {124if ((bits[i] & other.bits[i]) != 0) {125return true;126}127}128return false;129}130131UBool ScriptSet::contains(const ScriptSet &other) const {132ScriptSet t(*this);133t.intersect(other);134return (t == other);135}136137138ScriptSet &ScriptSet::setAll() {139for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {140bits[i] = 0xffffffffu;141}142return *this;143}144145146ScriptSet &ScriptSet::resetAll() {147uprv_memset(bits, 0, sizeof(bits));148return *this;149}150151int32_t ScriptSet::countMembers() const {152// This bit counter is good for sparse numbers of '1's, which is153// very much the case that we will usually have.154int32_t count = 0;155for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {156uint32_t x = bits[i];157while (x > 0) {158count++;159x &= (x - 1); // and off the least significant one bit.160}161}162return count;163}164165int32_t ScriptSet::hashCode() const {166int32_t hash = 0;167for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {168hash ^= bits[i];169}170return hash;171}172173int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {174// TODO: Wants a better implementation.175if (fromIndex < 0) {176return -1;177}178UErrorCode status = U_ZERO_ERROR;179for (int32_t scriptIndex = fromIndex; scriptIndex < SCRIPT_LIMIT; scriptIndex++) {180if (test(static_cast<UScriptCode>(scriptIndex), status)) {181return scriptIndex;182}183}184return -1;185}186187UBool ScriptSet::isEmpty() const {188for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {189if (bits[i] != 0) {190return false;191}192}193return true;194}195196UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {197UBool firstTime = true;198for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {199if (!firstTime) {200dest.append(static_cast<char16_t>(0x20));201}202firstTime = false;203const char* scriptName = uscript_getShortName(static_cast<UScriptCode>(i));204dest.append(UnicodeString(scriptName, -1, US_INV));205}206return dest;207}208209ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {210resetAll();211if (U_FAILURE(status)) {212return *this;213}214UnicodeString oneScriptName;215for (int32_t i=0; i<scriptString.length();) {216UChar32 c = scriptString.char32At(i);217i = scriptString.moveIndex32(i, 1);218if (!u_isUWhiteSpace(c)) {219oneScriptName.append(c);220if (i < scriptString.length()) {221continue;222}223}224if (oneScriptName.length() > 0) {225char buf[40];226oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);227buf[sizeof(buf)-1] = 0;228int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);229if (sc == UCHAR_INVALID_CODE) {230status = U_ILLEGAL_ARGUMENT_ERROR;231} else {232this->set(static_cast<UScriptCode>(sc), status);233}234if (U_FAILURE(status)) {235return *this;236}237oneScriptName.remove();238}239}240return *this;241}242243void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {244if (U_FAILURE(status)) { return; }245static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 20;246MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;247UErrorCode internalStatus = U_ZERO_ERROR;248int32_t script_count = -1;249250while (true) {251script_count = uscript_getScriptExtensions(252codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus);253if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {254// Need to allocate more space255if (scripts.resize(script_count) == nullptr) {256status = U_MEMORY_ALLOCATION_ERROR;257return;258}259internalStatus = U_ZERO_ERROR;260} else {261break;262}263}264265// Check if we failed for some reason other than buffer overflow266if (U_FAILURE(internalStatus)) {267status = internalStatus;268return;269}270271// Load the scripts into the ScriptSet and return272for (int32_t i = 0; i < script_count; i++) {273this->set(scripts[i], status);274if (U_FAILURE(status)) { return; }275}276}277278U_NAMESPACE_END279280U_CAPI UBool U_EXPORT2281uhash_equalsScriptSet(const UElement key1, const UElement key2) {282icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);283icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);284return (*s1 == *s2);285}286287U_CAPI int32_t U_EXPORT2288uhash_compareScriptSet(UElement key0, UElement key1) {289icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);290icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);291int32_t diff = s0->countMembers() - s1->countMembers();292if (diff != 0) return diff;293int32_t i0 = s0->nextSetBit(0);294int32_t i1 = s1->nextSetBit(0);295while ((diff = i0-i1) == 0 && i0 > 0) {296i0 = s0->nextSetBit(i0+1);297i1 = s1->nextSetBit(i1+1);298}299return diff;300}301302U_CAPI int32_t U_EXPORT2303uhash_hashScriptSet(const UElement key) {304icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);305return s->hashCode();306}307308U_CAPI void U_EXPORT2309uhash_deleteScriptSet(void *obj) {310icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);311delete s;312}313314315