Path: blob/master/thirdparty/icu4c/i18n/scriptset.cpp
20791 views
// © 2016 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html2/*3**********************************************************************4* Copyright (C) 2014, International Business Machines5* Corporation and others. All Rights Reserved.6**********************************************************************7*8* scriptset.cpp9*10* created on: 2013 Jan 711* created by: Andy Heninger12*/1314#include "unicode/utypes.h"1516#include "unicode/uchar.h"17#include "unicode/unistr.h"1819#include "scriptset.h"20#include "uassert.h"21#include "cmemory.h"2223U_NAMESPACE_BEGIN2425//----------------------------------------------------------------------------26//27// ScriptSet implementation28//29//----------------------------------------------------------------------------30ScriptSet::ScriptSet() {31uprv_memset(bits, 0, sizeof(bits));32}3334ScriptSet::~ScriptSet() {35}3637ScriptSet::ScriptSet(const ScriptSet &other) {38*this = other;39}4041ScriptSet & ScriptSet::operator =(const ScriptSet &other) {42if (this != &other) {43uprv_memcpy(bits, other.bits, sizeof(bits));44}45return *this;46}4748bool ScriptSet::operator == (const ScriptSet &other) const {49for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {50if (bits[i] != other.bits[i]) {51return false;52}53}54return true;55}5657UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {58if (U_FAILURE(status)) {59return false;60}61if (script < 0 || static_cast<int32_t>(script) >= SCRIPT_LIMIT) {62status = U_ILLEGAL_ARGUMENT_ERROR;63return false;64}65uint32_t index = script / 32;66uint32_t bit = 1 << (script & 31);67return ((bits[index] & bit) != 0);68}697071ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {72if (U_FAILURE(status)) {73return *this;74}75if (script < 0 || static_cast<int32_t>(script) >= SCRIPT_LIMIT) {76status = U_ILLEGAL_ARGUMENT_ERROR;77return *this;78}79uint32_t index = script / 32;80uint32_t bit = 1 << (script & 31);81bits[index] |= bit;82return *this;83}8485ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {86if (U_FAILURE(status)) {87return *this;88}89if (script < 0 || static_cast<int32_t>(script) >= SCRIPT_LIMIT) {90status = U_ILLEGAL_ARGUMENT_ERROR;91return *this;92}93uint32_t index = script / 32;94uint32_t bit = 1 << (script & 31);95bits[index] &= ~bit;96return *this;97}9899100101ScriptSet &ScriptSet::Union(const ScriptSet &other) {102for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {103bits[i] |= other.bits[i];104}105return *this;106}107108ScriptSet &ScriptSet::intersect(const ScriptSet &other) {109for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {110bits[i] &= other.bits[i];111}112return *this;113}114115ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {116ScriptSet t;117t.set(script, status);118if (U_SUCCESS(status)) {119this->intersect(t);120}121return *this;122}123124UBool ScriptSet::intersects(const ScriptSet &other) const {125for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {126if ((bits[i] & other.bits[i]) != 0) {127return true;128}129}130return false;131}132133UBool ScriptSet::contains(const ScriptSet &other) const {134ScriptSet t(*this);135t.intersect(other);136return (t == other);137}138139140ScriptSet &ScriptSet::setAll() {141for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {142bits[i] = 0xffffffffu;143}144return *this;145}146147148ScriptSet &ScriptSet::resetAll() {149uprv_memset(bits, 0, sizeof(bits));150return *this;151}152153int32_t ScriptSet::countMembers() const {154// This bit counter is good for sparse numbers of '1's, which is155// very much the case that we will usually have.156int32_t count = 0;157for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {158uint32_t x = bits[i];159while (x > 0) {160count++;161x &= (x - 1); // and off the least significant one bit.162}163}164return count;165}166167int32_t ScriptSet::hashCode() const {168int32_t hash = 0;169for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {170hash ^= bits[i];171}172return hash;173}174175int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {176// TODO: Wants a better implementation.177if (fromIndex < 0) {178return -1;179}180UErrorCode status = U_ZERO_ERROR;181for (int32_t scriptIndex = fromIndex; scriptIndex < SCRIPT_LIMIT; scriptIndex++) {182if (test(static_cast<UScriptCode>(scriptIndex), status)) {183return scriptIndex;184}185}186return -1;187}188189UBool ScriptSet::isEmpty() const {190for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {191if (bits[i] != 0) {192return false;193}194}195return true;196}197198UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {199UBool firstTime = true;200for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {201if (!firstTime) {202dest.append(static_cast<char16_t>(0x20));203}204firstTime = false;205const char* scriptName = uscript_getShortName(static_cast<UScriptCode>(i));206dest.append(UnicodeString(scriptName, -1, US_INV));207}208return dest;209}210211ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {212resetAll();213if (U_FAILURE(status)) {214return *this;215}216UnicodeString oneScriptName;217for (int32_t i=0; i<scriptString.length();) {218UChar32 c = scriptString.char32At(i);219i = scriptString.moveIndex32(i, 1);220if (!u_isUWhiteSpace(c)) {221oneScriptName.append(c);222if (i < scriptString.length()) {223continue;224}225}226if (oneScriptName.length() > 0) {227char buf[40];228oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);229buf[sizeof(buf)-1] = 0;230int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);231if (sc == UCHAR_INVALID_CODE) {232status = U_ILLEGAL_ARGUMENT_ERROR;233} else {234this->set(static_cast<UScriptCode>(sc), status);235}236if (U_FAILURE(status)) {237return *this;238}239oneScriptName.remove();240}241}242return *this;243}244245void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {246if (U_FAILURE(status)) { return; }247static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 20;248MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;249UErrorCode internalStatus = U_ZERO_ERROR;250int32_t script_count = -1;251252while (true) {253script_count = uscript_getScriptExtensions(254codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus);255if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {256// Need to allocate more space257if (scripts.resize(script_count) == nullptr) {258status = U_MEMORY_ALLOCATION_ERROR;259return;260}261internalStatus = U_ZERO_ERROR;262} else {263break;264}265}266267// Check if we failed for some reason other than buffer overflow268if (U_FAILURE(internalStatus)) {269status = internalStatus;270return;271}272273// Load the scripts into the ScriptSet and return274for (int32_t i = 0; i < script_count; i++) {275this->set(scripts[i], status);276if (U_FAILURE(status)) { return; }277}278}279280U_NAMESPACE_END281282U_CAPI UBool U_EXPORT2283uhash_equalsScriptSet(const UElement key1, const UElement key2) {284icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);285icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);286return (*s1 == *s2);287}288289U_CAPI int32_t U_EXPORT2290uhash_compareScriptSet(UElement key0, UElement key1) {291icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);292icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);293int32_t diff = s0->countMembers() - s1->countMembers();294if (diff != 0) return diff;295int32_t i0 = s0->nextSetBit(0);296int32_t i1 = s1->nextSetBit(0);297while ((diff = i0-i1) == 0 && i0 > 0) {298i0 = s0->nextSetBit(i0+1);299i1 = s1->nextSetBit(i1+1);300}301return diff;302}303304U_CAPI int32_t U_EXPORT2305uhash_hashScriptSet(const UElement key) {306icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);307return s->hashCode();308}309310U_CAPI void U_EXPORT2311uhash_deleteScriptSet(void *obj) {312icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);313delete s;314}315316317