Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/icu4c/i18n/uspoof.cpp
9912 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
***************************************************************************
5
* Copyright (C) 2008-2015, International Business Machines Corporation
6
* and others. All Rights Reserved.
7
***************************************************************************
8
* file name: uspoof.cpp
9
* encoding: UTF-8
10
* tab size: 8 (not used)
11
* indentation:4
12
*
13
* created on: 2008Feb13
14
* created by: Andy Heninger
15
*
16
* Unicode Spoof Detection
17
*/
18
#include "unicode/ubidi.h"
19
#include "unicode/utypes.h"
20
#include "unicode/normalizer2.h"
21
#include "unicode/uspoof.h"
22
#include "unicode/ustring.h"
23
#include "unicode/utf16.h"
24
#include "cmemory.h"
25
#include "cstring.h"
26
#include "mutex.h"
27
#include "scriptset.h"
28
#include "uassert.h"
29
#include "ucln_in.h"
30
#include "uspoof_impl.h"
31
#include "umutex.h"
32
33
34
#if !UCONFIG_NO_NORMALIZATION
35
36
U_NAMESPACE_USE
37
38
39
//
40
// Static Objects used by the spoof impl, their thread safe initialization and their cleanup.
41
//
42
static UnicodeSet *gInclusionSet = nullptr;
43
static UnicodeSet *gRecommendedSet = nullptr;
44
static const Normalizer2 *gNfdNormalizer = nullptr;
45
static UInitOnce gSpoofInitStaticsOnce {};
46
47
namespace {
48
49
UBool U_CALLCONV
50
uspoof_cleanup() {
51
delete gInclusionSet;
52
gInclusionSet = nullptr;
53
delete gRecommendedSet;
54
gRecommendedSet = nullptr;
55
gNfdNormalizer = nullptr;
56
gSpoofInitStaticsOnce.reset();
57
return true;
58
}
59
60
void U_CALLCONV initializeStatics(UErrorCode &status) {
61
gInclusionSet = new UnicodeSet();
62
gRecommendedSet = new UnicodeSet();
63
if (gInclusionSet == nullptr || gRecommendedSet == nullptr) {
64
status = U_MEMORY_ALLOCATION_ERROR;
65
delete gInclusionSet;
66
gInclusionSet = nullptr;
67
delete gRecommendedSet;
68
gRecommendedSet = nullptr;
69
return;
70
}
71
gInclusionSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_INCLUSION, status);
72
gRecommendedSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_RECOMMENDED, status);
73
if (U_FAILURE(status)) {
74
delete gInclusionSet;
75
gInclusionSet = nullptr;
76
delete gRecommendedSet;
77
gRecommendedSet = nullptr;
78
return;
79
}
80
gInclusionSet->freeze();
81
gRecommendedSet->freeze();
82
gNfdNormalizer = Normalizer2::getNFDInstance(status);
83
ucln_i18n_registerCleanup(UCLN_I18N_SPOOF, uspoof_cleanup);
84
}
85
86
} // namespace
87
88
U_CFUNC void uspoof_internalInitStatics(UErrorCode *status) {
89
umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
90
}
91
92
U_CAPI USpoofChecker * U_EXPORT2
93
uspoof_open(UErrorCode *status) {
94
umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
95
if (U_FAILURE(*status)) {
96
return nullptr;
97
}
98
SpoofImpl *si = new SpoofImpl(*status);
99
if (si == nullptr) {
100
*status = U_MEMORY_ALLOCATION_ERROR;
101
return nullptr;
102
}
103
if (U_FAILURE(*status)) {
104
delete si;
105
return nullptr;
106
}
107
return si->asUSpoofChecker();
108
}
109
110
111
U_CAPI USpoofChecker * U_EXPORT2
112
uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
113
UErrorCode *status) {
114
if (U_FAILURE(*status)) {
115
return nullptr;
116
}
117
118
if (data == nullptr) {
119
*status = U_ILLEGAL_ARGUMENT_ERROR;
120
return nullptr;
121
}
122
123
umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
124
if (U_FAILURE(*status))
125
{
126
return nullptr;
127
}
128
129
SpoofData *sd = new SpoofData(data, length, *status);
130
if (sd == nullptr) {
131
*status = U_MEMORY_ALLOCATION_ERROR;
132
return nullptr;
133
}
134
135
if (U_FAILURE(*status)) {
136
delete sd;
137
return nullptr;
138
}
139
140
SpoofImpl *si = new SpoofImpl(sd, *status);
141
if (si == nullptr) {
142
*status = U_MEMORY_ALLOCATION_ERROR;
143
delete sd; // explicit delete as the destructor for si won't be called.
144
return nullptr;
145
}
146
147
if (U_FAILURE(*status)) {
148
delete si; // no delete for sd, as the si destructor will delete it.
149
return nullptr;
150
}
151
152
if (pActualLength != nullptr) {
153
*pActualLength = sd->size();
154
}
155
return si->asUSpoofChecker();
156
}
157
158
159
U_CAPI USpoofChecker * U_EXPORT2
160
uspoof_clone(const USpoofChecker *sc, UErrorCode *status) {
161
const SpoofImpl *src = SpoofImpl::validateThis(sc, *status);
162
if (src == nullptr) {
163
return nullptr;
164
}
165
SpoofImpl *result = new SpoofImpl(*src, *status); // copy constructor
166
if (result == nullptr) {
167
*status = U_MEMORY_ALLOCATION_ERROR;
168
return nullptr;
169
}
170
if (U_FAILURE(*status)) {
171
delete result;
172
result = nullptr;
173
}
174
return result->asUSpoofChecker();
175
}
176
177
178
U_CAPI void U_EXPORT2
179
uspoof_close(USpoofChecker *sc) {
180
UErrorCode status = U_ZERO_ERROR;
181
SpoofImpl *This = SpoofImpl::validateThis(sc, status);
182
delete This;
183
}
184
185
186
U_CAPI void U_EXPORT2
187
uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) {
188
SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
189
if (This == nullptr) {
190
return;
191
}
192
193
// Verify that the requested checks are all ones (bits) that
194
// are acceptable, known values.
195
if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) {
196
*status = U_ILLEGAL_ARGUMENT_ERROR;
197
return;
198
}
199
200
This->fChecks = checks;
201
}
202
203
204
U_CAPI int32_t U_EXPORT2
205
uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status) {
206
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
207
if (This == nullptr) {
208
return 0;
209
}
210
return This->fChecks;
211
}
212
213
U_CAPI void U_EXPORT2
214
uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel) {
215
UErrorCode status = U_ZERO_ERROR;
216
SpoofImpl *This = SpoofImpl::validateThis(sc, status);
217
if (This != nullptr) {
218
This->fRestrictionLevel = restrictionLevel;
219
This->fChecks |= USPOOF_RESTRICTION_LEVEL;
220
}
221
}
222
223
U_CAPI URestrictionLevel U_EXPORT2
224
uspoof_getRestrictionLevel(const USpoofChecker *sc) {
225
UErrorCode status = U_ZERO_ERROR;
226
const SpoofImpl *This = SpoofImpl::validateThis(sc, status);
227
if (This == nullptr) {
228
return USPOOF_UNRESTRICTIVE;
229
}
230
return This->fRestrictionLevel;
231
}
232
233
U_CAPI void U_EXPORT2
234
uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status) {
235
SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
236
if (This == nullptr) {
237
return;
238
}
239
This->setAllowedLocales(localesList, *status);
240
}
241
242
U_CAPI const char * U_EXPORT2
243
uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status) {
244
SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
245
if (This == nullptr) {
246
return nullptr;
247
}
248
return This->getAllowedLocales(*status);
249
}
250
251
252
U_CAPI const USet * U_EXPORT2
253
uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status) {
254
const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status);
255
return result->toUSet();
256
}
257
258
U_CAPI const UnicodeSet * U_EXPORT2
259
uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status) {
260
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
261
if (This == nullptr) {
262
return nullptr;
263
}
264
return This->fAllowedCharsSet;
265
}
266
267
268
U_CAPI void U_EXPORT2
269
uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status) {
270
const UnicodeSet *set = UnicodeSet::fromUSet(chars);
271
uspoof_setAllowedUnicodeSet(sc, set, status);
272
}
273
274
275
U_CAPI void U_EXPORT2
276
uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) {
277
SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
278
if (This == nullptr) {
279
return;
280
}
281
if (chars->isBogus()) {
282
*status = U_ILLEGAL_ARGUMENT_ERROR;
283
return;
284
}
285
UnicodeSet *clonedSet = chars->clone();
286
if (clonedSet == nullptr || clonedSet->isBogus()) {
287
*status = U_MEMORY_ALLOCATION_ERROR;
288
return;
289
}
290
clonedSet->freeze();
291
delete This->fAllowedCharsSet;
292
This->fAllowedCharsSet = clonedSet;
293
This->fChecks |= USPOOF_CHAR_LIMIT;
294
}
295
296
297
U_CAPI int32_t U_EXPORT2
298
uspoof_check(const USpoofChecker *sc,
299
const char16_t *id, int32_t length,
300
int32_t *position,
301
UErrorCode *status) {
302
303
// Backwards compatibility:
304
if (position != nullptr) {
305
*position = 0;
306
}
307
308
// Delegate to uspoof_check2
309
return uspoof_check2(sc, id, length, nullptr, status);
310
}
311
312
313
U_CAPI int32_t U_EXPORT2
314
uspoof_check2(const USpoofChecker *sc,
315
const char16_t* id, int32_t length,
316
USpoofCheckResult* checkResult,
317
UErrorCode *status) {
318
319
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
320
if (This == nullptr) {
321
return 0;
322
}
323
if (length < -1) {
324
*status = U_ILLEGAL_ARGUMENT_ERROR;
325
return 0;
326
}
327
UnicodeString idStr((length == -1), id, length); // Aliasing constructor.
328
int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status);
329
return result;
330
}
331
332
333
U_CAPI int32_t U_EXPORT2
334
uspoof_checkUTF8(const USpoofChecker *sc,
335
const char *id, int32_t length,
336
int32_t *position,
337
UErrorCode *status) {
338
339
// Backwards compatibility:
340
if (position != nullptr) {
341
*position = 0;
342
}
343
344
// Delegate to uspoof_check2
345
return uspoof_check2UTF8(sc, id, length, nullptr, status);
346
}
347
348
349
U_CAPI int32_t U_EXPORT2
350
uspoof_check2UTF8(const USpoofChecker *sc,
351
const char *id, int32_t length,
352
USpoofCheckResult* checkResult,
353
UErrorCode *status) {
354
355
if (U_FAILURE(*status)) {
356
return 0;
357
}
358
UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : static_cast<int32_t>(uprv_strlen(id))));
359
int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status);
360
return result;
361
}
362
363
364
U_CAPI int32_t U_EXPORT2
365
uspoof_areConfusable(const USpoofChecker *sc,
366
const char16_t *id1, int32_t length1,
367
const char16_t *id2, int32_t length2,
368
UErrorCode *status) {
369
SpoofImpl::validateThis(sc, *status);
370
if (U_FAILURE(*status)) {
371
return 0;
372
}
373
if (length1 < -1 || length2 < -1) {
374
*status = U_ILLEGAL_ARGUMENT_ERROR;
375
return 0;
376
}
377
378
UnicodeString id1Str((length1==-1), id1, length1); // Aliasing constructor
379
UnicodeString id2Str((length2==-1), id2, length2); // Aliasing constructor
380
return uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
381
}
382
383
384
U_CAPI int32_t U_EXPORT2
385
uspoof_areConfusableUTF8(const USpoofChecker *sc,
386
const char *id1, int32_t length1,
387
const char *id2, int32_t length2,
388
UErrorCode *status) {
389
SpoofImpl::validateThis(sc, *status);
390
if (U_FAILURE(*status)) {
391
return 0;
392
}
393
if (length1 < -1 || length2 < -1) {
394
*status = U_ILLEGAL_ARGUMENT_ERROR;
395
return 0;
396
}
397
UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : static_cast<int32_t>(uprv_strlen(id1))));
398
UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : static_cast<int32_t>(uprv_strlen(id2))));
399
int32_t results = uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
400
return results;
401
}
402
403
404
U_CAPI int32_t U_EXPORT2
405
uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
406
const icu::UnicodeString &id1,
407
const icu::UnicodeString &id2,
408
UErrorCode *status) {
409
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
410
if (U_FAILURE(*status)) {
411
return 0;
412
}
413
//
414
// See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable,
415
// and for definitions of the types (single, whole, mixed-script) of confusables.
416
417
// We only care about a few of the check flags. Ignore the others.
418
// If no tests relevant to this function have been specified, return an error.
419
// TODO: is this really the right thing to do? It's probably an error on the caller's part,
420
// but logically we would just return 0 (no error).
421
if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
422
*status = U_INVALID_STATE_ERROR;
423
return 0;
424
}
425
426
// Compute the skeletons and check for confusability.
427
UnicodeString id1Skeleton;
428
uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id1, id1Skeleton, status);
429
UnicodeString id2Skeleton;
430
uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id2, id2Skeleton, status);
431
if (U_FAILURE(*status)) { return 0; }
432
if (id1Skeleton != id2Skeleton) {
433
return 0;
434
}
435
436
// If we get here, the strings are confusable. Now we just need to set the flags for the appropriate classes
437
// of confusables according to UTS 39 section 4.
438
// Start by computing the resolved script sets of id1 and id2.
439
ScriptSet id1RSS;
440
This->getResolvedScriptSet(id1, id1RSS, *status);
441
ScriptSet id2RSS;
442
This->getResolvedScriptSet(id2, id2RSS, *status);
443
444
// Turn on all applicable flags
445
int32_t result = 0;
446
if (id1RSS.intersects(id2RSS)) {
447
result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
448
} else {
449
result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
450
if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
451
result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
452
}
453
}
454
455
// Turn off flags that the user doesn't want
456
if ((This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) == 0) {
457
result &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE;
458
}
459
if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) == 0) {
460
result &= ~USPOOF_MIXED_SCRIPT_CONFUSABLE;
461
}
462
if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) == 0) {
463
result &= ~USPOOF_WHOLE_SCRIPT_CONFUSABLE;
464
}
465
466
return result;
467
}
468
469
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
470
const char16_t *id1, int32_t length1,
471
const char16_t *id2, int32_t length2,
472
UErrorCode *status) {
473
UnicodeString id1Str((length1 == -1), id1, length1); // Aliasing constructor
474
UnicodeString id2Str((length2 == -1), id2, length2); // Aliasing constructor
475
if (id1Str.isBogus() || id2Str.isBogus()) {
476
*status = U_ILLEGAL_ARGUMENT_ERROR;
477
return 0;
478
}
479
return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
480
}
481
482
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
483
const char *id1, int32_t length1, const char *id2,
484
int32_t length2, UErrorCode *status) {
485
if (length1 < -1 || length2 < -1) {
486
*status = U_ILLEGAL_ARGUMENT_ERROR;
487
return 0;
488
}
489
UnicodeString id1Str = UnicodeString::fromUTF8(
490
StringPiece(id1, length1 >= 0 ? length1 : static_cast<int32_t>(uprv_strlen(id1))));
491
UnicodeString id2Str = UnicodeString::fromUTF8(
492
StringPiece(id2, length2 >= 0 ? length2 : static_cast<int32_t>(uprv_strlen(id2))));
493
return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
494
}
495
496
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
497
UBiDiDirection direction,
498
const icu::UnicodeString &id1,
499
const icu::UnicodeString &id2,
500
UErrorCode *status) {
501
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
502
if (U_FAILURE(*status)) {
503
return 0;
504
}
505
//
506
// See section 4 of UTS 39 for the algorithm for checking whether two strings are confusable,
507
// and for definitions of the types (single, whole, mixed-script) of confusables.
508
509
// We only care about a few of the check flags. Ignore the others.
510
// If no tests relevant to this function have been specified, return an error.
511
// TODO: is this really the right thing to do? It's probably an error on the caller's part,
512
// but logically we would just return 0 (no error).
513
if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
514
*status = U_INVALID_STATE_ERROR;
515
return 0;
516
}
517
518
// Compute the skeletons and check for confusability.
519
UnicodeString id1Skeleton;
520
uspoof_getBidiSkeletonUnicodeString(sc, direction, id1, id1Skeleton, status);
521
UnicodeString id2Skeleton;
522
uspoof_getBidiSkeletonUnicodeString(sc, direction, id2, id2Skeleton, status);
523
if (U_FAILURE(*status)) {
524
return 0;
525
}
526
if (id1Skeleton != id2Skeleton) {
527
return 0;
528
}
529
530
// If we get here, the strings are confusable. Now we just need to set the flags for the appropriate
531
// classes of confusables according to UTS 39 section 4. Start by computing the resolved script sets
532
// of id1 and id2.
533
ScriptSet id1RSS;
534
This->getResolvedScriptSet(id1, id1RSS, *status);
535
ScriptSet id2RSS;
536
This->getResolvedScriptSet(id2, id2RSS, *status);
537
538
// Turn on all applicable flags
539
uint32_t result = 0;
540
if (id1RSS.intersects(id2RSS)) {
541
result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
542
} else {
543
result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
544
if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
545
result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
546
}
547
}
548
549
// Turn off flags that the user doesn't want
550
return result & This->fChecks;
551
}
552
553
554
U_CAPI int32_t U_EXPORT2
555
uspoof_checkUnicodeString(const USpoofChecker *sc,
556
const icu::UnicodeString &id,
557
int32_t *position,
558
UErrorCode *status) {
559
560
// Backwards compatibility:
561
if (position != nullptr) {
562
*position = 0;
563
}
564
565
// Delegate to uspoof_check2
566
return uspoof_check2UnicodeString(sc, id, nullptr, status);
567
}
568
569
namespace {
570
571
int32_t checkImpl(const SpoofImpl* This, const UnicodeString& id, CheckResult* checkResult, UErrorCode* status) {
572
U_ASSERT(This != nullptr);
573
U_ASSERT(checkResult != nullptr);
574
checkResult->clear();
575
int32_t result = 0;
576
577
if (0 != (This->fChecks & USPOOF_RESTRICTION_LEVEL)) {
578
URestrictionLevel idRestrictionLevel = This->getRestrictionLevel(id, *status);
579
if (idRestrictionLevel > This->fRestrictionLevel) {
580
result |= USPOOF_RESTRICTION_LEVEL;
581
}
582
checkResult->fRestrictionLevel = idRestrictionLevel;
583
}
584
585
if (0 != (This->fChecks & USPOOF_MIXED_NUMBERS)) {
586
UnicodeSet numerics;
587
This->getNumerics(id, numerics, *status);
588
if (numerics.size() > 1) {
589
result |= USPOOF_MIXED_NUMBERS;
590
}
591
checkResult->fNumerics = numerics; // UnicodeSet::operator=
592
}
593
594
if (0 != (This->fChecks & USPOOF_HIDDEN_OVERLAY)) {
595
int32_t index = This->findHiddenOverlay(id, *status);
596
if (index != -1) {
597
result |= USPOOF_HIDDEN_OVERLAY;
598
}
599
}
600
601
602
if (0 != (This->fChecks & USPOOF_CHAR_LIMIT)) {
603
int32_t i;
604
UChar32 c;
605
int32_t length = id.length();
606
for (i=0; i<length ;) {
607
c = id.char32At(i);
608
i += U16_LENGTH(c);
609
if (!This->fAllowedCharsSet->contains(c)) {
610
result |= USPOOF_CHAR_LIMIT;
611
break;
612
}
613
}
614
}
615
616
if (0 != (This->fChecks & USPOOF_INVISIBLE)) {
617
// This check needs to be done on NFD input
618
UnicodeString nfdText;
619
gNfdNormalizer->normalize(id, nfdText, *status);
620
int32_t nfdLength = nfdText.length();
621
622
// scan for more than one occurrence of the same non-spacing mark
623
// in a sequence of non-spacing marks.
624
int32_t i;
625
UChar32 c;
626
UChar32 firstNonspacingMark = 0;
627
UBool haveMultipleMarks = false;
628
UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence.
629
630
for (i=0; i<nfdLength ;) {
631
c = nfdText.char32At(i);
632
i += U16_LENGTH(c);
633
if (u_charType(c) != U_NON_SPACING_MARK) {
634
firstNonspacingMark = 0;
635
if (haveMultipleMarks) {
636
marksSeenSoFar.clear();
637
haveMultipleMarks = false;
638
}
639
continue;
640
}
641
if (firstNonspacingMark == 0) {
642
firstNonspacingMark = c;
643
continue;
644
}
645
if (!haveMultipleMarks) {
646
marksSeenSoFar.add(firstNonspacingMark);
647
haveMultipleMarks = true;
648
}
649
if (marksSeenSoFar.contains(c)) {
650
// report the error, and stop scanning.
651
// No need to find more than the first failure.
652
result |= USPOOF_INVISIBLE;
653
break;
654
}
655
marksSeenSoFar.add(c);
656
}
657
}
658
659
checkResult->fChecks = result;
660
return checkResult->toCombinedBitmask(This->fChecks);
661
}
662
663
} // namespace
664
665
U_CAPI int32_t U_EXPORT2
666
uspoof_check2UnicodeString(const USpoofChecker *sc,
667
const icu::UnicodeString &id,
668
USpoofCheckResult* checkResult,
669
UErrorCode *status) {
670
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
671
if (This == nullptr) {
672
return false;
673
}
674
675
if (checkResult != nullptr) {
676
CheckResult* ThisCheckResult = CheckResult::validateThis(checkResult, *status);
677
if (ThisCheckResult == nullptr) {
678
return false;
679
}
680
return checkImpl(This, id, ThisCheckResult, status);
681
} else {
682
// Stack-allocate the checkResult since this method doesn't return it
683
CheckResult stackCheckResult;
684
return checkImpl(This, id, &stackCheckResult, status);
685
}
686
}
687
688
689
U_CAPI int32_t U_EXPORT2
690
uspoof_getSkeleton(const USpoofChecker *sc,
691
uint32_t type,
692
const char16_t *id, int32_t length,
693
char16_t *dest, int32_t destCapacity,
694
UErrorCode *status) {
695
696
SpoofImpl::validateThis(sc, *status);
697
if (U_FAILURE(*status)) {
698
return 0;
699
}
700
if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) {
701
*status = U_ILLEGAL_ARGUMENT_ERROR;
702
return 0;
703
}
704
705
UnicodeString idStr((length==-1), id, length); // Aliasing constructor
706
UnicodeString destStr;
707
uspoof_getSkeletonUnicodeString(sc, type, idStr, destStr, status);
708
destStr.extract(dest, destCapacity, *status);
709
return destStr.length();
710
}
711
712
U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc, UBiDiDirection direction,
713
const UChar *id, int32_t length, UChar *dest,
714
int32_t destCapacity, UErrorCode *status) {
715
UnicodeString idStr((length == -1), id, length); // Aliasing constructor
716
if (idStr.isBogus()) {
717
*status = U_ILLEGAL_ARGUMENT_ERROR;
718
return 0;
719
}
720
UnicodeString destStr;
721
uspoof_getBidiSkeletonUnicodeString(sc, direction, idStr, destStr, status);
722
return destStr.extract(dest, destCapacity, *status);
723
}
724
725
726
727
U_I18N_API UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(const USpoofChecker *sc,
728
UBiDiDirection direction,
729
const UnicodeString &id,
730
UnicodeString &dest,
731
UErrorCode *status) {
732
dest.remove();
733
if (direction != UBIDI_LTR && direction != UBIDI_RTL) {
734
*status = U_ILLEGAL_ARGUMENT_ERROR;
735
return dest;
736
}
737
UBiDi *bidi = ubidi_open();
738
ubidi_setPara(bidi, id.getBuffer(), id.length(), direction,
739
/*embeddingLevels*/ nullptr, status);
740
if (U_FAILURE(*status)) {
741
ubidi_close(bidi);
742
return dest;
743
}
744
UnicodeString reordered;
745
int32_t const size = ubidi_getProcessedLength(bidi);
746
UChar* const reorderedBuffer = reordered.getBuffer(size);
747
if (reorderedBuffer == nullptr) {
748
*status = U_MEMORY_ALLOCATION_ERROR;
749
ubidi_close(bidi);
750
return dest;
751
}
752
ubidi_writeReordered(bidi, reorderedBuffer, size,
753
UBIDI_KEEP_BASE_COMBINING | UBIDI_DO_MIRRORING, status);
754
reordered.releaseBuffer(size);
755
ubidi_close(bidi);
756
757
if (U_FAILURE(*status)) {
758
return dest;
759
}
760
761
// The type parameter is deprecated since ICU 58; any number may be passed.
762
constexpr uint32_t deprecatedType = 58;
763
return uspoof_getSkeletonUnicodeString(sc, deprecatedType, reordered, dest, status);
764
}
765
766
767
768
U_I18N_API UnicodeString & U_EXPORT2
769
uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
770
uint32_t /*type*/,
771
const UnicodeString &id,
772
UnicodeString &dest,
773
UErrorCode *status) {
774
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
775
if (U_FAILURE(*status)) {
776
return dest;
777
}
778
779
UnicodeString nfdId;
780
gNfdNormalizer->normalize(id, nfdId, *status);
781
782
// Apply the skeleton mapping to the NFD normalized input string
783
// Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
784
int32_t inputIndex = 0;
785
UnicodeString skelStr;
786
int32_t normalizedLen = nfdId.length();
787
for (inputIndex=0; inputIndex < normalizedLen; ) {
788
UChar32 c = nfdId.char32At(inputIndex);
789
inputIndex += U16_LENGTH(c);
790
if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
791
This->fSpoofData->confusableLookup(c, skelStr);
792
}
793
}
794
795
gNfdNormalizer->normalize(skelStr, dest, *status);
796
return dest;
797
}
798
799
U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id,
800
int32_t length, char *dest, int32_t destCapacity,
801
UErrorCode *status) {
802
SpoofImpl::validateThis(sc, *status);
803
if (U_FAILURE(*status)) {
804
return 0;
805
}
806
if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) {
807
*status = U_ILLEGAL_ARGUMENT_ERROR;
808
return 0;
809
}
810
811
UnicodeString srcStr = UnicodeString::fromUTF8(
812
StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
813
UnicodeString destStr;
814
uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status);
815
if (U_FAILURE(*status)) {
816
return 0;
817
}
818
819
int32_t lengthInUTF8 = 0;
820
u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
821
return lengthInUTF8;
822
}
823
824
U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
825
const char *id, int32_t length, char *dest,
826
int32_t destCapacity, UErrorCode *status) {
827
if (length < -1) {
828
*status = U_ILLEGAL_ARGUMENT_ERROR;
829
return 0;
830
}
831
832
UnicodeString srcStr = UnicodeString::fromUTF8(
833
StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
834
UnicodeString destStr;
835
uspoof_getBidiSkeletonUnicodeString(sc, direction, srcStr, destStr, status);
836
if (U_FAILURE(*status)) {
837
return 0;
838
}
839
840
int32_t lengthInUTF8 = 0;
841
u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
842
return lengthInUTF8;
843
}
844
845
846
U_CAPI int32_t U_EXPORT2
847
uspoof_serialize(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) {
848
SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
849
if (This == nullptr) {
850
U_ASSERT(U_FAILURE(*status));
851
return 0;
852
}
853
854
return This->fSpoofData->serialize(buf, capacity, *status);
855
}
856
857
U_CAPI const USet * U_EXPORT2
858
uspoof_getInclusionSet(UErrorCode *status) {
859
umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
860
return gInclusionSet->toUSet();
861
}
862
863
U_CAPI const USet * U_EXPORT2
864
uspoof_getRecommendedSet(UErrorCode *status) {
865
umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
866
return gRecommendedSet->toUSet();
867
}
868
869
U_I18N_API const UnicodeSet * U_EXPORT2
870
uspoof_getInclusionUnicodeSet(UErrorCode *status) {
871
umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
872
return gInclusionSet;
873
}
874
875
U_I18N_API const UnicodeSet * U_EXPORT2
876
uspoof_getRecommendedUnicodeSet(UErrorCode *status) {
877
umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
878
return gRecommendedSet;
879
}
880
881
//------------------
882
// CheckResult APIs
883
//------------------
884
885
U_CAPI USpoofCheckResult* U_EXPORT2
886
uspoof_openCheckResult(UErrorCode *status) {
887
CheckResult* checkResult = new CheckResult();
888
if (checkResult == nullptr) {
889
*status = U_MEMORY_ALLOCATION_ERROR;
890
return nullptr;
891
}
892
return checkResult->asUSpoofCheckResult();
893
}
894
895
U_CAPI void U_EXPORT2
896
uspoof_closeCheckResult(USpoofCheckResult* checkResult) {
897
UErrorCode status = U_ZERO_ERROR;
898
CheckResult* This = CheckResult::validateThis(checkResult, status);
899
delete This;
900
}
901
902
U_CAPI int32_t U_EXPORT2
903
uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status) {
904
const CheckResult* This = CheckResult::validateThis(checkResult, *status);
905
if (U_FAILURE(*status)) { return 0; }
906
return This->fChecks;
907
}
908
909
U_CAPI URestrictionLevel U_EXPORT2
910
uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status) {
911
const CheckResult* This = CheckResult::validateThis(checkResult, *status);
912
if (U_FAILURE(*status)) { return USPOOF_UNRESTRICTIVE; }
913
return This->fRestrictionLevel;
914
}
915
916
U_CAPI const USet* U_EXPORT2
917
uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status) {
918
const CheckResult* This = CheckResult::validateThis(checkResult, *status);
919
if (U_FAILURE(*status)) { return nullptr; }
920
return This->fNumerics.toUSet();
921
}
922
923
924
925
#endif // !UCONFIG_NO_NORMALIZATION
926
927