Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/icucommon/characterproperties.cpp
12343 views
1
// © 2018 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// characterproperties.cpp
5
// created: 2018sep03 Markus W. Scherer
6
7
#include "unicode/utypes.h"
8
#include "unicode/localpointer.h"
9
#include "unicode/uchar.h"
10
#include "unicode/ucpmap.h"
11
#include "unicode/ucptrie.h"
12
#include "unicode/umutablecptrie.h"
13
#include "unicode/uniset.h"
14
#include "unicode/uscript.h"
15
#include "unicode/uset.h"
16
#include "cmemory.h"
17
#include "emojiprops.h"
18
#include "mutex.h"
19
#include "normalizer2impl.h"
20
#include "uassert.h"
21
#include "ubidi_props.h"
22
#include "ucase.h"
23
#include "ucln_cmn.h"
24
#include "umutex.h"
25
#include "uprops.h"
26
27
using icu::LocalPointer;
28
#if !UCONFIG_NO_NORMALIZATION
29
using icu::Normalizer2Factory;
30
using icu::Normalizer2Impl;
31
#endif
32
using icu::UInitOnce;
33
using icu::UnicodeSet;
34
35
namespace {
36
37
UBool U_CALLCONV characterproperties_cleanup();
38
39
constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + (UCHAR_INT_LIMIT - UCHAR_INT_START);
40
41
struct Inclusion {
42
UnicodeSet *fSet = nullptr;
43
UInitOnce fInitOnce {};
44
};
45
Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()
46
47
UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};
48
49
UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {};
50
51
icu::UMutex cpMutex;
52
53
//----------------------------------------------------------------
54
// Inclusions list
55
//----------------------------------------------------------------
56
57
// USetAdder implementation
58
// Does not use uset.h to reduce code dependencies
59
void U_CALLCONV
60
_set_add(USet *set, UChar32 c) {
61
((UnicodeSet *)set)->add(c);
62
}
63
64
void U_CALLCONV
65
_set_addRange(USet *set, UChar32 start, UChar32 end) {
66
((UnicodeSet *)set)->add(start, end);
67
}
68
69
void U_CALLCONV
70
_set_addString(USet *set, const UChar *str, int32_t length) {
71
((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length));
72
}
73
74
UBool U_CALLCONV characterproperties_cleanup() {
75
for (Inclusion &in: gInclusions) {
76
delete in.fSet;
77
in.fSet = nullptr;
78
in.fInitOnce.reset();
79
}
80
for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) {
81
delete sets[i];
82
sets[i] = nullptr;
83
}
84
for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) {
85
ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i]));
86
maps[i] = nullptr;
87
}
88
return true;
89
}
90
91
void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
92
// This function is invoked only via umtx_initOnce().
93
U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
94
if (src == UPROPS_SRC_NONE) {
95
errorCode = U_INTERNAL_PROGRAM_ERROR;
96
return;
97
}
98
U_ASSERT(gInclusions[src].fSet == nullptr);
99
100
LocalPointer<UnicodeSet> incl(new UnicodeSet());
101
if (incl.isNull()) {
102
errorCode = U_MEMORY_ALLOCATION_ERROR;
103
return;
104
}
105
USetAdder sa = {
106
(USet *)incl.getAlias(),
107
_set_add,
108
_set_addRange,
109
_set_addString,
110
nullptr, // don't need remove()
111
nullptr // don't need removeRange()
112
};
113
114
switch(src) {
115
case UPROPS_SRC_CHAR:
116
uchar_addPropertyStarts(&sa, &errorCode);
117
break;
118
case UPROPS_SRC_PROPSVEC:
119
upropsvec_addPropertyStarts(&sa, &errorCode);
120
break;
121
case UPROPS_SRC_CHAR_AND_PROPSVEC:
122
uchar_addPropertyStarts(&sa, &errorCode);
123
upropsvec_addPropertyStarts(&sa, &errorCode);
124
break;
125
#if !UCONFIG_NO_NORMALIZATION
126
case UPROPS_SRC_CASE_AND_NORM: {
127
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
128
if(U_SUCCESS(errorCode)) {
129
impl->addPropertyStarts(&sa, errorCode);
130
}
131
ucase_addPropertyStarts(&sa, &errorCode);
132
break;
133
}
134
case UPROPS_SRC_NFC: {
135
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
136
if(U_SUCCESS(errorCode)) {
137
impl->addPropertyStarts(&sa, errorCode);
138
}
139
break;
140
}
141
case UPROPS_SRC_NFKC: {
142
const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode);
143
if(U_SUCCESS(errorCode)) {
144
impl->addPropertyStarts(&sa, errorCode);
145
}
146
break;
147
}
148
case UPROPS_SRC_NFKC_CF: {
149
const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode);
150
if(U_SUCCESS(errorCode)) {
151
impl->addPropertyStarts(&sa, errorCode);
152
}
153
break;
154
}
155
case UPROPS_SRC_NFC_CANON_ITER: {
156
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
157
if(U_SUCCESS(errorCode)) {
158
impl->addCanonIterPropertyStarts(&sa, errorCode);
159
}
160
break;
161
}
162
#endif
163
case UPROPS_SRC_CASE:
164
ucase_addPropertyStarts(&sa, &errorCode);
165
break;
166
case UPROPS_SRC_BIDI:
167
ubidi_addPropertyStarts(&sa, &errorCode);
168
break;
169
case UPROPS_SRC_INPC:
170
case UPROPS_SRC_INSC:
171
case UPROPS_SRC_VO:
172
uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
173
break;
174
case UPROPS_SRC_EMOJI: {
175
const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
176
if (U_SUCCESS(errorCode)) {
177
ep->addPropertyStarts(&sa, errorCode);
178
}
179
break;
180
}
181
default:
182
errorCode = U_INTERNAL_PROGRAM_ERROR;
183
break;
184
}
185
186
if (U_FAILURE(errorCode)) {
187
return;
188
}
189
if (incl->isBogus()) {
190
errorCode = U_MEMORY_ALLOCATION_ERROR;
191
return;
192
}
193
// Compact for caching.
194
incl->compact();
195
gInclusions[src].fSet = incl.orphan();
196
ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
197
}
198
199
const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) {
200
if (U_FAILURE(errorCode)) { return nullptr; }
201
if (src < 0 || UPROPS_SRC_COUNT <= src) {
202
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
203
return nullptr;
204
}
205
Inclusion &i = gInclusions[src];
206
umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode);
207
return i.fSet;
208
}
209
210
void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
211
// This function is invoked only via umtx_initOnce().
212
U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
213
int32_t inclIndex = UPROPS_SRC_COUNT + (prop - UCHAR_INT_START);
214
U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
215
UPropertySource src = uprops_getSource(prop);
216
const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
217
if (U_FAILURE(errorCode)) {
218
return;
219
}
220
221
LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0));
222
if (intPropIncl.isNull()) {
223
errorCode = U_MEMORY_ALLOCATION_ERROR;
224
return;
225
}
226
int32_t numRanges = incl->getRangeCount();
227
int32_t prevValue = 0;
228
for (int32_t i = 0; i < numRanges; ++i) {
229
UChar32 rangeEnd = incl->getRangeEnd(i);
230
for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) {
231
// TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
232
int32_t value = u_getIntPropertyValue(c, prop);
233
if (value != prevValue) {
234
intPropIncl->add(c);
235
prevValue = value;
236
}
237
}
238
}
239
240
if (intPropIncl->isBogus()) {
241
errorCode = U_MEMORY_ALLOCATION_ERROR;
242
return;
243
}
244
// Compact for caching.
245
intPropIncl->compact();
246
gInclusions[inclIndex].fSet = intPropIncl.orphan();
247
ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
248
}
249
250
} // namespace
251
252
U_NAMESPACE_BEGIN
253
254
const UnicodeSet *CharacterProperties::getInclusionsForProperty(
255
UProperty prop, UErrorCode &errorCode) {
256
if (U_FAILURE(errorCode)) { return nullptr; }
257
if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
258
int32_t inclIndex = UPROPS_SRC_COUNT + (prop - UCHAR_INT_START);
259
Inclusion &i = gInclusions[inclIndex];
260
umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
261
return i.fSet;
262
} else {
263
UPropertySource src = uprops_getSource(prop);
264
return getInclusionsForSource(src, errorCode);
265
}
266
}
267
268
U_NAMESPACE_END
269
270
namespace {
271
272
UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
273
if (U_FAILURE(errorCode)) { return nullptr; }
274
LocalPointer<UnicodeSet> set(new UnicodeSet());
275
if (set.isNull()) {
276
errorCode = U_MEMORY_ALLOCATION_ERROR;
277
return nullptr;
278
}
279
if (UCHAR_BASIC_EMOJI <= property && property <= UCHAR_RGI_EMOJI) {
280
// property of strings
281
const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
282
if (U_FAILURE(errorCode)) { return nullptr; }
283
USetAdder sa = {
284
(USet *)set.getAlias(),
285
_set_add,
286
_set_addRange,
287
_set_addString,
288
nullptr, // don't need remove()
289
nullptr // don't need removeRange()
290
};
291
ep->addStrings(&sa, property, errorCode);
292
if (property != UCHAR_BASIC_EMOJI && property != UCHAR_RGI_EMOJI) {
293
// property of _only_ strings
294
set->freeze();
295
return set.orphan();
296
}
297
}
298
299
const UnicodeSet *inclusions =
300
icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
301
if (U_FAILURE(errorCode)) { return nullptr; }
302
int32_t numRanges = inclusions->getRangeCount();
303
UChar32 startHasProperty = -1;
304
305
for (int32_t i = 0; i < numRanges; ++i) {
306
UChar32 rangeEnd = inclusions->getRangeEnd(i);
307
for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
308
// TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch.
309
if (u_hasBinaryProperty(c, property)) {
310
if (startHasProperty < 0) {
311
// Transition from false to true.
312
startHasProperty = c;
313
}
314
} else if (startHasProperty >= 0) {
315
// Transition from true to false.
316
set->add(startHasProperty, c - 1);
317
startHasProperty = -1;
318
}
319
}
320
}
321
if (startHasProperty >= 0) {
322
set->add(startHasProperty, 0x10FFFF);
323
}
324
set->freeze();
325
return set.orphan();
326
}
327
328
UCPMap *makeMap(UProperty property, UErrorCode &errorCode) {
329
if (U_FAILURE(errorCode)) { return nullptr; }
330
uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0;
331
icu::LocalUMutableCPTriePointer mutableTrie(
332
umutablecptrie_open(nullValue, nullValue, &errorCode));
333
const UnicodeSet *inclusions =
334
icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
335
if (U_FAILURE(errorCode)) { return nullptr; }
336
int32_t numRanges = inclusions->getRangeCount();
337
UChar32 start = 0;
338
uint32_t value = nullValue;
339
340
for (int32_t i = 0; i < numRanges; ++i) {
341
UChar32 rangeEnd = inclusions->getRangeEnd(i);
342
for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
343
// TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
344
uint32_t nextValue = u_getIntPropertyValue(c, property);
345
if (value != nextValue) {
346
if (value != nullValue) {
347
umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode);
348
}
349
start = c;
350
value = nextValue;
351
}
352
}
353
}
354
if (value != 0) {
355
umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode);
356
}
357
358
UCPTrieType type;
359
if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) {
360
type = UCPTRIE_TYPE_FAST;
361
} else {
362
type = UCPTRIE_TYPE_SMALL;
363
}
364
UCPTrieValueWidth valueWidth;
365
// TODO: UCharacterProperty.IntProperty
366
int32_t max = u_getIntPropertyMaxValue(property);
367
if (max <= 0xff) {
368
valueWidth = UCPTRIE_VALUE_BITS_8;
369
} else if (max <= 0xffff) {
370
valueWidth = UCPTRIE_VALUE_BITS_16;
371
} else {
372
valueWidth = UCPTRIE_VALUE_BITS_32;
373
}
374
return reinterpret_cast<UCPMap *>(
375
umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode));
376
}
377
378
} // namespace
379
380
U_NAMESPACE_USE
381
382
U_CAPI const USet * U_EXPORT2
383
u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
384
if (U_FAILURE(*pErrorCode)) { return nullptr; }
385
if (property < 0 || UCHAR_BINARY_LIMIT <= property) {
386
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
387
return nullptr;
388
}
389
Mutex m(&cpMutex);
390
UnicodeSet *set = sets[property];
391
if (set == nullptr) {
392
sets[property] = set = makeSet(property, *pErrorCode);
393
}
394
if (U_FAILURE(*pErrorCode)) { return nullptr; }
395
return set->toUSet();
396
}
397
398
U_CAPI const UCPMap * U_EXPORT2
399
u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) {
400
if (U_FAILURE(*pErrorCode)) { return nullptr; }
401
if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) {
402
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
403
return nullptr;
404
}
405
Mutex m(&cpMutex);
406
UCPMap *map = maps[property - UCHAR_INT_START];
407
if (map == nullptr) {
408
maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode);
409
}
410
return map;
411
}
412
413