Path: blob/master/thirdparty/icu4c/i18n/unicode/uspoof.h
9912 views
// © 2016 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html2/*3***************************************************************************4* Copyright (C) 2008-2016, International Business Machines Corporation5* and others. All Rights Reserved.6***************************************************************************7* file name: uspoof.h8* encoding: UTF-89* tab size: 8 (not used)10* indentation:411*12* created on: 2008Feb1313* created by: Andy Heninger14*15* Unicode Spoof Detection16*/1718#ifndef USPOOF_H19#define USPOOF_H2021#include "unicode/ubidi.h"22#include "unicode/utypes.h"23#include "unicode/uset.h"24#include "unicode/parseerr.h"2526#if !UCONFIG_NO_NORMALIZATION272829#if U_SHOW_CPLUSPLUS_API30#include "unicode/localpointer.h"31#include "unicode/unistr.h"32#include "unicode/uniset.h"33#endif343536/**37* \file38* \brief C API: Unicode Security and Spoofing Detection39*40* <p>41* This class, based on <a href="http://unicode.org/reports/tr36">Unicode Technical Report #36</a> and42* <a href="http://unicode.org/reports/tr39">Unicode Technical Standard #39</a>, has two main functions:43*44* <ol>45* <li>Checking whether two strings are visually <em>confusable</em> with each other, such as "Harvest" and46* "Ηarvest", where the second string starts with the Greek capital letter Eta.</li>47* <li>Checking whether an individual string is likely to be an attempt at confusing the reader (<em>spoof48* detection</em>), such as "paypal" with some Latin characters substituted with Cyrillic look-alikes.</li>49* </ol>50*51* <p>52* Although originally designed as a method for flagging suspicious identifier strings such as URLs,53* <code>USpoofChecker</code> has a number of other practical use cases, such as preventing attempts to evade bad-word54* content filters.55*56* <p>57* The functions of this class are exposed as C API, with a handful of syntactical conveniences for C++.58*59* <h2>Confusables</h2>60*61* <p>62* The following example shows how to use <code>USpoofChecker</code> to check for confusability between two strings:63*64* \code{.c}65* UErrorCode status = U_ZERO_ERROR;66* UChar* str1 = (UChar*) u"Harvest";67* UChar* str2 = (UChar*) u"\u0397arvest"; // with U+0397 GREEK CAPITAL LETTER ETA68*69* USpoofChecker* sc = uspoof_open(&status);70* uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);71*72* int32_t bitmask = uspoof_areConfusable(sc, str1, -1, str2, -1, &status);73* UBool result = bitmask != 0;74* // areConfusable: 1 (status: U_ZERO_ERROR)75* printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));76* uspoof_close(sc);77* \endcode78*79* <p>80* The call to {@link uspoof_open} creates a <code>USpoofChecker</code> object; the call to {@link uspoof_setChecks}81* enables confusable checking and disables all other checks; the call to {@link uspoof_areConfusable} performs the82* confusability test; and the following line extracts the result out of the return value. For best performance,83* the instance should be created once (e.g., upon application startup), and the efficient84* {@link uspoof_areConfusable} method can be used at runtime.85*86* If the paragraph direction used to display the strings is known, the bidi function should be used instead:87*88* \code{.c}89* UErrorCode status = U_ZERO_ERROR;90* // These strings look identical when rendered in a left-to-right context.91* // They look distinct in a right-to-left context.92* UChar* str1 = (UChar*) u"A1\u05D0"; // A1א93* UChar* str2 = (UChar*) u"A\u05D01"; // Aא194*95* USpoofChecker* sc = uspoof_open(&status);96* uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);97*98* int32_t bitmask = uspoof_areBidiConfusable(sc, UBIDI_LTR, str1, -1, str2, -1, &status);99* UBool result = bitmask != 0;100* // areBidiConfusable: 1 (status: U_ZERO_ERROR)101* printf("areBidiConfusable: %d (status: %s)\n", result, u_errorName(status));102* uspoof_close(sc);103* \endcode104*105* <p>106* The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers. It will automatically call107* {@link uspoof_close} when the object goes out of scope:108*109* \code{.cpp}110* UErrorCode status = U_ZERO_ERROR;111* LocalUSpoofCheckerPointer sc(uspoof_open(&status));112* uspoof_setChecks(sc.getAlias(), USPOOF_CONFUSABLE, &status);113* // ...114* \endcode115*116* UTS 39 defines two strings to be <em>confusable</em> if they map to the same <em>skeleton string</em>. A skeleton can117* be thought of as a "hash code". {@link uspoof_getSkeleton} computes the skeleton for a particular string, so118* the following snippet is equivalent to the example above:119*120* \code{.c}121* UErrorCode status = U_ZERO_ERROR;122* UChar* str1 = (UChar*) u"Harvest";123* UChar* str2 = (UChar*) u"\u0397arvest"; // with U+0397 GREEK CAPITAL LETTER ETA124*125* USpoofChecker* sc = uspoof_open(&status);126* uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);127*128* // Get skeleton 1129* int32_t skel1Len = uspoof_getSkeleton(sc, 0, str1, -1, NULL, 0, &status);130* UChar* skel1 = (UChar*) malloc(++skel1Len * sizeof(UChar));131* status = U_ZERO_ERROR;132* uspoof_getSkeleton(sc, 0, str1, -1, skel1, skel1Len, &status);133*134* // Get skeleton 2135* int32_t skel2Len = uspoof_getSkeleton(sc, 0, str2, -1, NULL, 0, &status);136* UChar* skel2 = (UChar*) malloc(++skel2Len * sizeof(UChar));137* status = U_ZERO_ERROR;138* uspoof_getSkeleton(sc, 0, str2, -1, skel2, skel2Len, &status);139*140* // Are the skeletons the same?141* UBool result = u_strcmp(skel1, skel2) == 0;142* // areConfusable: 1 (status: U_ZERO_ERROR)143* printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));144* uspoof_close(sc);145* free(skel1);146* free(skel2);147* \endcode148*149* If you need to check if a string is confusable with any string in a dictionary of many strings, rather than calling150* {@link uspoof_areConfusable} many times in a loop, {@link uspoof_getSkeleton} can be used instead, as shown below:151*152* \code{.c}153* UErrorCode status = U_ZERO_ERROR;154* #define DICTIONARY_LENGTH 2155* UChar* dictionary[DICTIONARY_LENGTH] = { (UChar*) u"lorem", (UChar*) u"ipsum" };156* UChar* skeletons[DICTIONARY_LENGTH];157* UChar* str = (UChar*) u"1orern";158*159* // Setup:160* USpoofChecker* sc = uspoof_open(&status);161* uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);162* for (size_t i=0; i<DICTIONARY_LENGTH; i++) {163* UChar* word = dictionary[i];164* int32_t len = uspoof_getSkeleton(sc, 0, word, -1, NULL, 0, &status);165* skeletons[i] = (UChar*) malloc(++len * sizeof(UChar));166* status = U_ZERO_ERROR;167* uspoof_getSkeleton(sc, 0, word, -1, skeletons[i], len, &status);168* }169*170* // Live Check:171* {172* int32_t len = uspoof_getSkeleton(sc, 0, str, -1, NULL, 0, &status);173* UChar* skel = (UChar*) malloc(++len * sizeof(UChar));174* status = U_ZERO_ERROR;175* uspoof_getSkeleton(sc, 0, str, -1, skel, len, &status);176* UBool result = false;177* for (size_t i=0; i<DICTIONARY_LENGTH; i++) {178* result = u_strcmp(skel, skeletons[i]) == 0;179* if (result == true) { break; }180* }181* // Has confusable in dictionary: 1 (status: U_ZERO_ERROR)182* printf("Has confusable in dictionary: %d (status: %s)\n", result, u_errorName(status));183* free(skel);184* }185*186* for (size_t i=0; i<DICTIONARY_LENGTH; i++) {187* free(skeletons[i]);188* }189* uspoof_close(sc);190* \endcode191*192* <b>Note:</b> Since the Unicode confusables mapping table is frequently updated, confusable skeletons are <em>not</em>193* guaranteed to be the same between ICU releases. We therefore recommend that you always compute confusable skeletons194* at runtime and do not rely on creating a permanent, or difficult to update, database of skeletons.195*196* <h2>Spoof Detection</h2>197*198* The following snippet shows a minimal example of using <code>USpoofChecker</code> to perform spoof detection on a199* string:200*201* \code{.c}202* UErrorCode status = U_ZERO_ERROR;203* UChar* str = (UChar*) u"p\u0430ypal"; // with U+0430 CYRILLIC SMALL LETTER A204*205* // Get the default set of allowable characters:206* USet* allowed = uset_openEmpty();207* uset_addAll(allowed, uspoof_getRecommendedSet(&status));208* uset_addAll(allowed, uspoof_getInclusionSet(&status));209*210* USpoofChecker* sc = uspoof_open(&status);211* uspoof_setAllowedChars(sc, allowed, &status);212* uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);213*214* int32_t bitmask = uspoof_check(sc, str, -1, NULL, &status);215* UBool result = bitmask != 0;216* // fails checks: 1 (status: U_ZERO_ERROR)217* printf("fails checks: %d (status: %s)\n", result, u_errorName(status));218* uspoof_close(sc);219* uset_close(allowed);220* \endcode221*222* As in the case for confusability checking, it is good practice to create one <code>USpoofChecker</code> instance at223* startup, and call the cheaper {@link uspoof_check} online. We specify the set of224* allowed characters to be those with type RECOMMENDED or INCLUSION, according to the recommendation in UTS 39.225*226* In addition to {@link uspoof_check}, the function {@link uspoof_checkUTF8} is exposed for UTF8-encoded char* strings,227* and {@link uspoof_checkUnicodeString} is exposed for C++ programmers.228*229* If the {@link USPOOF_AUX_INFO} check is enabled, a limited amount of information on why a string failed the checks230* is available in the returned bitmask. For complete information, use the {@link uspoof_check2} class of functions231* with a {@link USpoofCheckResult} parameter:232*233* \code{.c}234* UErrorCode status = U_ZERO_ERROR;235* UChar* str = (UChar*) u"p\u0430ypal"; // with U+0430 CYRILLIC SMALL LETTER A236*237* // Get the default set of allowable characters:238* USet* allowed = uset_openEmpty();239* uset_addAll(allowed, uspoof_getRecommendedSet(&status));240* uset_addAll(allowed, uspoof_getInclusionSet(&status));241*242* USpoofChecker* sc = uspoof_open(&status);243* uspoof_setAllowedChars(sc, allowed, &status);244* uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);245*246* USpoofCheckResult* checkResult = uspoof_openCheckResult(&status);247* int32_t bitmask = uspoof_check2(sc, str, -1, checkResult, &status);248*249* int32_t failures1 = bitmask;250* int32_t failures2 = uspoof_getCheckResultChecks(checkResult, &status);251* assert(failures1 == failures2);252* // checks that failed: 0x00000010 (status: U_ZERO_ERROR)253* printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));254*255* // Cleanup:256* uspoof_close(sc);257* uset_close(allowed);258* uspoof_closeCheckResult(checkResult);259* \endcode260*261* C++ users can take advantage of a few syntactical conveniences. The following snippet is functionally262* equivalent to the one above:263*264* \code{.cpp}265* UErrorCode status = U_ZERO_ERROR;266* UnicodeString str((UChar*) u"p\u0430ypal"); // with U+0430 CYRILLIC SMALL LETTER A267*268* // Get the default set of allowable characters:269* UnicodeSet allowed;270* allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));271* allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));272*273* LocalUSpoofCheckerPointer sc(uspoof_open(&status));274* uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);275* uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);276*277* LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));278* int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);279*280* int32_t failures1 = bitmask;281* int32_t failures2 = uspoof_getCheckResultChecks(checkResult.getAlias(), &status);282* assert(failures1 == failures2);283* // checks that failed: 0x00000010 (status: U_ZERO_ERROR)284* printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));285*286* // Explicit cleanup not necessary.287* \endcode288*289* The return value is a bitmask of the checks that failed. In this case, there was one check that failed:290* {@link USPOOF_RESTRICTION_LEVEL}, corresponding to the fifth bit (16). The possible checks are:291*292* <ul>293* <li><code>RESTRICTION_LEVEL</code>: flags strings that violate the294* <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">Restriction Level</a> test as specified in UTS295* 39; in most cases, this means flagging strings that contain characters from multiple different scripts.</li>296* <li><code>INVISIBLE</code>: flags strings that contain invisible characters, such as zero-width spaces, or character297* sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.</li>298* <li><code>CHAR_LIMIT</code>: flags strings that contain characters outside of a specified set of acceptable299* characters. See {@link uspoof_setAllowedChars} and {@link uspoof_setAllowedLocales}.</li>300* <li><code>MIXED_NUMBERS</code>: flags strings that contain digits from multiple different numbering systems.</li>301* </ul>302*303* <p>304* These checks can be enabled independently of each other. For example, if you were interested in checking for only the305* INVISIBLE and MIXED_NUMBERS conditions, you could do:306*307* \code{.c}308* UErrorCode status = U_ZERO_ERROR;309* UChar* str = (UChar*) u"8\u09EA"; // 8 mixed with U+09EA BENGALI DIGIT FOUR310*311* USpoofChecker* sc = uspoof_open(&status);312* uspoof_setChecks(sc, USPOOF_INVISIBLE | USPOOF_MIXED_NUMBERS, &status);313*314* int32_t bitmask = uspoof_check2(sc, str, -1, NULL, &status);315* UBool result = bitmask != 0;316* // fails checks: 1 (status: U_ZERO_ERROR)317* printf("fails checks: %d (status: %s)\n", result, u_errorName(status));318* uspoof_close(sc);319* \endcode320*321* Here is an example in C++ showing how to compute the restriction level of a string:322*323* \code{.cpp}324* UErrorCode status = U_ZERO_ERROR;325* UnicodeString str((UChar*) u"p\u0430ypal"); // with U+0430 CYRILLIC SMALL LETTER A326*327* // Get the default set of allowable characters:328* UnicodeSet allowed;329* allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));330* allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));331*332* LocalUSpoofCheckerPointer sc(uspoof_open(&status));333* uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);334* uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);335* uspoof_setChecks(sc.getAlias(), USPOOF_RESTRICTION_LEVEL | USPOOF_AUX_INFO, &status);336*337* LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));338* int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);339*340* URestrictionLevel restrictionLevel = uspoof_getCheckResultRestrictionLevel(checkResult.getAlias(), &status);341* // Since USPOOF_AUX_INFO was enabled, the restriction level is also available in the upper bits of the bitmask:342* assert((restrictionLevel & bitmask) == restrictionLevel);343* // Restriction level: 0x50000000 (status: U_ZERO_ERROR)344* printf("Restriction level: %#010x (status: %s)\n", restrictionLevel, u_errorName(status));345* \endcode346*347* The code '0x50000000' corresponds to the restriction level USPOOF_MINIMALLY_RESTRICTIVE. Since348* USPOOF_MINIMALLY_RESTRICTIVE is weaker than USPOOF_MODERATELY_RESTRICTIVE, the string fails the check.349*350* <b>Note:</b> The Restriction Level is the most powerful of the checks. The full logic is documented in351* <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">UTS 39</a>, but the basic idea is that strings352* are restricted to contain characters from only a single script, <em>except</em> that most scripts are allowed to have353* Latin characters interspersed. Although the default restriction level is <code>HIGHLY_RESTRICTIVE</code>, it is354* recommended that users set their restriction level to <code>MODERATELY_RESTRICTIVE</code>, which allows Latin mixed355* with all other scripts except Cyrillic, Greek, and Cherokee, with which it is often confusable. For more details on356* the levels, see UTS 39 or {@link URestrictionLevel}. The Restriction Level test is aware of the set of357* allowed characters set in {@link uspoof_setAllowedChars}. Note that characters which have script code358* COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple359* scripts.360*361* <h2>Advanced bidirectional usage</h2>362* If the paragraph direction with which the identifiers will be displayed is not known, there are363* multiple options for confusable detection depending on the circumstances.364*365* <p>366* In some circumstances, the only concern is confusion between identifiers displayed with the same367* paragraph direction.368*369* <p>370* An example is the case where identifiers are usernames prefixed with the @ symbol.371* That symbol will appear to the left in a left-to-right context, and to the right in a372* right-to-left context, so that an identifier displayed in a left-to-right context can never be373* confused with an identifier displayed in a right-to-left context:374* <ul>375* <li>376* The usernames "A1א" (A one aleph) and "Aא1" (A aleph 1)377* would be considered confusable, since they both appear as \@A1א in a left-to-right context, and the378* usernames "אA_1" (aleph A underscore one) and "א1_A" (aleph one underscore A) would be considered379* confusable, since they both appear as A_1א@ in a right-to-left context.380* </li>381* <li>382* The username "Mark_" would not be considered confusable with the username "_Mark",383* even though the latter would appear as Mark_@ in a right-to-left context, and the384* former as \@Mark_ in a left-to-right context.385* </li>386* </ul>387* <p>388* In that case, the caller should check for both LTR-confusability and RTL-confusability:389*390* \code{.cpp}391* bool confusableInEitherDirection =392* uspoof_areBidiConfusableUnicodeString(sc, UBIDI_LTR, id1, id2, &status) ||393* uspoof_areBidiConfusableUnicodeString(sc, UBIDI_RTL, id1, id2, &status);394* \endcode395*396* If the bidiSkeleton is used, the LTR and RTL skeleta should be kept separately and compared, LTR397* with LTR and RTL with RTL.398*399* <p>400* In cases where confusability between the visual appearances of an identifier displayed in a401* left-to-right context with another identifier displayed in a right-to-left context is a concern,402* the LTR skeleton of one can be compared with the RTL skeleton of the other. However, this403* very broad definition of confusability may have unexpected results; for instance, it treats the404* ASCII identifiers "Mark_" and "_Mark" as confusable.405*406* <h2>Additional Information</h2>407*408* A <code>USpoofChecker</code> instance may be used repeatedly to perform checks on any number of identifiers.409*410* <b>Thread Safety:</b> The test functions for checking a single identifier, or for testing whether411* two identifiers are possible confusable, are thread safe. They may called concurrently, from multiple threads,412* using the same USpoofChecker instance.413*414* More generally, the standard ICU thread safety rules apply: functions that take a const USpoofChecker parameter are415* thread safe. Those that take a non-const USpoofChecker are not thread safe..416*417* @stable ICU 4.6418*/419420U_CDECL_BEGIN421422struct USpoofChecker;423/**424* @stable ICU 4.2425*/426typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */427428struct USpoofCheckResult;429/**430* @see uspoof_openCheckResult431* @stable ICU 58432*/433typedef struct USpoofCheckResult USpoofCheckResult;434435/**436* Enum for the kinds of checks that USpoofChecker can perform.437* These enum values are used both to select the set of checks that438* will be performed, and to report results from the check function.439*440* @stable ICU 4.2441*/442typedef enum USpoofChecks {443/**444* When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates445* that the two strings are visually confusable and that they are from the same script, according to UTS 39 section446* 4.447*448* @see uspoof_areConfusable449* @stable ICU 4.2450*/451USPOOF_SINGLE_SCRIPT_CONFUSABLE = 1,452453/**454* When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates455* that the two strings are visually confusable and that they are <b>not</b> from the same script, according to UTS456* 39 section 4.457*458* @see uspoof_areConfusable459* @stable ICU 4.2460*/461USPOOF_MIXED_SCRIPT_CONFUSABLE = 2,462463/**464* When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates465* that the two strings are visually confusable and that they are not from the same script but both of them are466* single-script strings, according to UTS 39 section 4.467*468* @see uspoof_areConfusable469* @stable ICU 4.2470*/471USPOOF_WHOLE_SCRIPT_CONFUSABLE = 4,472473/**474* Enable this flag in {@link uspoof_setChecks} to turn on all types of confusables. You may set475* the checks to some subset of SINGLE_SCRIPT_CONFUSABLE, MIXED_SCRIPT_CONFUSABLE, or WHOLE_SCRIPT_CONFUSABLE to476* make {@link uspoof_areConfusable} return only those types of confusables.477*478* @see uspoof_areConfusable479* @see uspoof_getSkeleton480* @stable ICU 58481*/482USPOOF_CONFUSABLE = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE,483484#ifndef U_HIDE_DEPRECATED_API485/**486* This flag is deprecated and no longer affects the behavior of SpoofChecker.487*488* @deprecated ICU 58 Any case confusable mappings were removed from UTS 39; the corresponding ICU API was deprecated.489*/490USPOOF_ANY_CASE = 8,491#endif /* U_HIDE_DEPRECATED_API */492493/**494* Check that an identifier is no looser than the specified RestrictionLevel.495* The default if {@link uspoof_setRestrictionLevel} is not called is HIGHLY_RESTRICTIVE.496*497* If USPOOF_AUX_INFO is enabled the actual restriction level of the498* identifier being tested will also be returned by uspoof_check().499*500* @see URestrictionLevel501* @see uspoof_setRestrictionLevel502* @see USPOOF_AUX_INFO503*504* @stable ICU 51505*/506USPOOF_RESTRICTION_LEVEL = 16,507508#ifndef U_HIDE_DEPRECATED_API509/** Check that an identifier contains only characters from a510* single script (plus chars from the common and inherited scripts.)511* Applies to checks of a single identifier check only.512* @deprecated ICU 51 Use RESTRICTION_LEVEL instead.513*/514USPOOF_SINGLE_SCRIPT = USPOOF_RESTRICTION_LEVEL,515#endif /* U_HIDE_DEPRECATED_API */516517/** Check an identifier for the presence of invisible characters,518* such as zero-width spaces, or character sequences that are519* likely not to display, such as multiple occurrences of the same520* non-spacing mark. This check does not test the input string as a whole521* for conformance to any particular syntax for identifiers.522*/523USPOOF_INVISIBLE = 32,524525/** Check that an identifier contains only characters from a specified set526* of acceptable characters. See {@link uspoof_setAllowedChars} and527* {@link uspoof_setAllowedLocales}. Note that a string that fails this check528* will also fail the {@link USPOOF_RESTRICTION_LEVEL} check.529*/530USPOOF_CHAR_LIMIT = 64,531532/**533* Check that an identifier does not mix numbers from different numbering systems.534* For more information, see UTS 39 section 5.3.535*536* @stable ICU 51537*/538USPOOF_MIXED_NUMBERS = 128,539540/**541* Check that an identifier does not have a combining character following a character in which that542* combining character would be hidden; for example 'i' followed by a U+0307 combining dot.543*544* More specifically, the following characters are forbidden from preceding a U+0307:545* <ul>546* <li>Those with the Soft_Dotted Unicode property (which includes 'i' and 'j')</li>547* <li>Latin lowercase letter 'l'</li>548* <li>Dotless 'i' and 'j' ('ı' and 'ȷ', U+0131 and U+0237)</li>549* <li>Any character whose confusable prototype ends with such a character550* (Soft_Dotted, 'l', 'ı', or 'ȷ')</li>551* </ul>552* In addition, combining characters are allowed between the above characters and U+0307 except those553* with combining class 0 or combining class "Above" (230, same class as U+0307).554*555* This list and the number of combing characters considered by this check may grow over time.556*557* @stable ICU 62558*/559USPOOF_HIDDEN_OVERLAY = 256,560561/**562* Enable all spoof checks.563*564* @stable ICU 4.6565*/566USPOOF_ALL_CHECKS = 0xFFFF,567568/**569* Enable the return of auxiliary (non-error) information in the570* upper bits of the check results value.571*572* If this "check" is not enabled, the results of {@link uspoof_check} will be573* zero when an identifier passes all of the enabled checks.574*575* If this "check" is enabled, (uspoof_check() & {@link USPOOF_ALL_CHECKS}) will576* be zero when an identifier passes all checks.577*578* @stable ICU 51579*/580USPOOF_AUX_INFO = 0x40000000581582} USpoofChecks;583584585/**586* Constants from UTS #39 for use in {@link uspoof_setRestrictionLevel}, and587* for returned identifier restriction levels in check results.588*589* @stable ICU 51590*591* @see uspoof_setRestrictionLevel592* @see uspoof_check593*/594typedef enum URestrictionLevel {595/**596* All characters in the string are in the identifier profile and all characters in the string are in the597* ASCII range.598*599* @stable ICU 51600*/601USPOOF_ASCII = 0x10000000,602/**603* The string classifies as ASCII-Only, or all characters in the string are in the identifier profile and604* the string is single-script, according to the definition in UTS 39 section 5.1.605*606* @stable ICU 53607*/608USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000,609/**610* The string classifies as Single Script, or all characters in the string are in the identifier profile and611* the string is covered by any of the following sets of scripts, according to the definition in UTS 39612* section 5.1:613* <ul>614* <li>Latin + Han + Bopomofo (or equivalently: Latn + Hanb)</li>615* <li>Latin + Han + Hiragana + Katakana (or equivalently: Latn + Jpan)</li>616* <li>Latin + Han + Hangul (or equivalently: Latn +Kore)</li>617* </ul>618* This is the default restriction in ICU.619*620* @stable ICU 51621*/622USPOOF_HIGHLY_RESTRICTIVE = 0x30000000,623/**624* The string classifies as Highly Restrictive, or all characters in the string are in the identifier profile625* and the string is covered by Latin and any one other Recommended or Aspirational script, except Cyrillic,626* Greek, and Cherokee.627*628* @stable ICU 51629*/630USPOOF_MODERATELY_RESTRICTIVE = 0x40000000,631/**632* All characters in the string are in the identifier profile. Allow arbitrary mixtures of scripts.633*634* @stable ICU 51635*/636USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000,637/**638* Any valid identifiers, including characters outside of the Identifier Profile.639*640* @stable ICU 51641*/642USPOOF_UNRESTRICTIVE = 0x60000000,643/**644* Mask for selecting the Restriction Level bits from the return value of {@link uspoof_check}.645*646* @stable ICU 53647*/648USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000,649#ifndef U_HIDE_INTERNAL_API650/**651* An undefined restriction level.652* @internal653*/654USPOOF_UNDEFINED_RESTRICTIVE = -1655#endif /* U_HIDE_INTERNAL_API */656} URestrictionLevel;657658/**659* Create a Unicode Spoof Checker, configured to perform all660* checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT.661* Note that additional checks may be added in the future,662* resulting in the changes to the default checking behavior.663*664* @param status The error code, set if this function encounters a problem.665* @return the newly created Spoof Checker666* @stable ICU 4.2667*/668U_CAPI USpoofChecker * U_EXPORT2669uspoof_open(UErrorCode *status);670671672/**673* Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.674* Inverse of uspoof_serialize().675* The memory containing the serialized data must remain valid and unchanged676* as long as the spoof checker, or any cloned copies of the spoof checker,677* are in use. Ownership of the memory remains with the caller.678* The spoof checker (and any clones) must be closed prior to deleting the679* serialized data.680*681* @param data a pointer to 32-bit-aligned memory containing the serialized form of spoof data682* @param length the number of bytes available at data;683* can be more than necessary684* @param pActualLength receives the actual number of bytes at data taken up by the data;685* can be NULL686* @param pErrorCode ICU error code687* @return the spoof checker.688*689* @see uspoof_open690* @see uspoof_serialize691* @stable ICU 4.2692*/693U_CAPI USpoofChecker * U_EXPORT2694uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,695UErrorCode *pErrorCode);696697/**698* Open a Spoof Checker from the source form of the spoof data.699* The input corresponds to the Unicode data file confusables.txt700* as described in Unicode Technical Standard #39. The syntax of the source data701* is as described in UTS #39 for this file, and the content of702* this file is acceptable input.703*704* The character encoding of the (char *) input text is UTF-8.705*706* @param confusables a pointer to the confusable characters definitions,707* as found in file confusables.txt from unicode.org.708* @param confusablesLen The length of the confusables text, or -1 if the709* input string is zero terminated.710* @param confusablesWholeScript711* Deprecated in ICU 58. No longer used.712* @param confusablesWholeScriptLen713* Deprecated in ICU 58. No longer used.714* @param errType In the event of an error in the input, indicates715* which of the input files contains the error.716* The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or717* USPOOF_WHOLE_SCRIPT_CONFUSABLE, or718* zero if no errors are found.719* @param pe In the event of an error in the input, receives the position720* in the input text (line, offset) of the error.721* @param status an in/out ICU UErrorCode. Among the possible errors is722* U_PARSE_ERROR, which is used to report syntax errors723* in the input.724* @return A spoof checker that uses the rules from the input files.725* @stable ICU 4.2726*/727U_CAPI USpoofChecker * U_EXPORT2728uspoof_openFromSource(const char *confusables, int32_t confusablesLen,729const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,730int32_t *errType, UParseError *pe, UErrorCode *status);731732733/**734* Close a Spoof Checker, freeing any memory that was being held by735* its implementation.736* @stable ICU 4.2737*/738U_CAPI void U_EXPORT2739uspoof_close(USpoofChecker *sc);740741/**742* Clone a Spoof Checker. The clone will be set to perform the same checks743* as the original source.744*745* @param sc The source USpoofChecker746* @param status The error code, set if this function encounters a problem.747* @return748* @stable ICU 4.2749*/750U_CAPI USpoofChecker * U_EXPORT2751uspoof_clone(const USpoofChecker *sc, UErrorCode *status);752753754/**755* Specify the bitmask of checks that will be performed by {@link uspoof_check}. Calling this method756* overwrites any checks that may have already been enabled. By default, all checks are enabled.757*758* To enable specific checks and disable all others,759* OR together only the bit constants for the desired checks.760* For example, to fail strings containing characters outside of761* the set specified by {@link uspoof_setAllowedChars} and762* also strings that contain digits from mixed numbering systems:763*764* <pre>765* {@code766* uspoof_setChecks(USPOOF_CHAR_LIMIT | USPOOF_MIXED_NUMBERS);767* }768* </pre>769*770* To disable specific checks and enable all others,771* start with ALL_CHECKS and "AND away" the not-desired checks.772* For example, if you are not planning to use the {@link uspoof_areConfusable} functionality,773* it is good practice to disable the CONFUSABLE check:774*775* <pre>776* {@code777* uspoof_setChecks(USPOOF_ALL_CHECKS & ~USPOOF_CONFUSABLE);778* }779* </pre>780*781* Note that methods such as {@link uspoof_setAllowedChars}, {@link uspoof_setAllowedLocales}, and782* {@link uspoof_setRestrictionLevel} will enable certain checks when called. Those methods will OR the check they783* enable onto the existing bitmask specified by this method. For more details, see the documentation of those784* methods.785*786* @param sc The USpoofChecker787* @param checks The set of checks that this spoof checker will perform.788* The value is a bit set, obtained by OR-ing together789* values from enum USpoofChecks.790* @param status The error code, set if this function encounters a problem.791* @stable ICU 4.2792*793*/794U_CAPI void U_EXPORT2795uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);796797/**798* Get the set of checks that this Spoof Checker has been configured to perform.799*800* @param sc The USpoofChecker801* @param status The error code, set if this function encounters a problem.802* @return The set of checks that this spoof checker will perform.803* The value is a bit set, obtained by OR-ing together804* values from enum USpoofChecks.805* @stable ICU 4.2806*807*/808U_CAPI int32_t U_EXPORT2809uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);810811/**812* Set the loosest restriction level allowed for strings. The default if this is not called is813* {@link USPOOF_HIGHLY_RESTRICTIVE}. Calling this method enables the {@link USPOOF_RESTRICTION_LEVEL} and814* {@link USPOOF_MIXED_NUMBERS} checks, corresponding to Sections 5.1 and 5.2 of UTS 39. To customize which checks are815* to be performed by {@link uspoof_check}, see {@link uspoof_setChecks}.816*817* @param sc The USpoofChecker818* @param restrictionLevel The loosest restriction level allowed.819* @see URestrictionLevel820* @stable ICU 51821*/822U_CAPI void U_EXPORT2823uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel);824825826/**827* Get the Restriction Level that will be tested if the checks include {@link USPOOF_RESTRICTION_LEVEL}.828*829* @return The restriction level830* @see URestrictionLevel831* @stable ICU 51832*/833U_CAPI URestrictionLevel U_EXPORT2834uspoof_getRestrictionLevel(const USpoofChecker *sc);835836/**837* Limit characters that are acceptable in identifiers being checked to those838* normally used with the languages associated with the specified locales.839* Any previously specified list of locales is replaced by the new settings.840*841* A set of languages is determined from the locale(s), and842* from those a set of acceptable Unicode scripts is determined.843* Characters from this set of scripts, along with characters from844* the "common" and "inherited" Unicode Script categories845* will be permitted.846*847* Supplying an empty string removes all restrictions;848* characters from any script will be allowed.849*850* The {@link USPOOF_CHAR_LIMIT} test is automatically enabled for this851* USpoofChecker when calling this function with a non-empty list852* of locales.853*854* The Unicode Set of characters that will be allowed is accessible855* via the uspoof_getAllowedChars() function. uspoof_setAllowedLocales()856* will <i>replace</i> any previously applied set of allowed characters.857*858* Adjustments, such as additions or deletions of certain classes of characters,859* can be made to the result of uspoof_setAllowedLocales() by860* fetching the resulting set with uspoof_getAllowedChars(),861* manipulating it with the Unicode Set API, then resetting the862* spoof detectors limits with uspoof_setAllowedChars().863*864* @param sc The USpoofChecker865* @param localesList A list list of locales, from which the language866* and associated script are extracted. The locales867* are comma-separated if there is more than one.868* White space may not appear within an individual locale,869* but is ignored otherwise.870* The locales are syntactically like those from the871* HTTP Accept-Language header.872* If the localesList is empty, no restrictions will be placed on873* the allowed characters.874*875* @param status The error code, set if this function encounters a problem.876* @stable ICU 4.2877*/878U_CAPI void U_EXPORT2879uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);880881/**882* Get a list of locales for the scripts that are acceptable in strings883* to be checked. If no limitations on scripts have been specified,884* an empty string will be returned.885*886* uspoof_setAllowedChars() will reset the list of allowed to be empty.887*888* The format of the returned list is the same as that supplied to889* uspoof_setAllowedLocales(), but returned list may not be identical890* to the originally specified string; the string may be reformatted,891* and information other than languages from892* the originally specified locales may be omitted.893*894* @param sc The USpoofChecker895* @param status The error code, set if this function encounters a problem.896* @return A string containing a list of locales corresponding897* to the acceptable scripts, formatted like an898* HTTP Accept Language value.899*900* @stable ICU 4.2901*/902U_CAPI const char * U_EXPORT2903uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status);904905906/**907* Limit the acceptable characters to those specified by a Unicode Set.908* Any previously specified character limit is909* is replaced by the new settings. This includes limits on910* characters that were set with the uspoof_setAllowedLocales() function.911*912* The USPOOF_CHAR_LIMIT test is automatically enabled for this913* USpoofChecker by this function.914*915* @param sc The USpoofChecker916* @param chars A Unicode Set containing the list of917* characters that are permitted. Ownership of the set918* remains with the caller. The incoming set is cloned by919* this function, so there are no restrictions on modifying920* or deleting the USet after calling this function.921* @param status The error code, set if this function encounters a problem.922* @stable ICU 4.2923*/924U_CAPI void U_EXPORT2925uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status);926927928/**929* Get a USet for the characters permitted in an identifier.930* This corresponds to the limits imposed by the Set Allowed Characters931* functions. Limitations imposed by other checks will not be932* reflected in the set returned by this function.933*934* The returned set will be frozen, meaning that it cannot be modified935* by the caller.936*937* Ownership of the returned set remains with the Spoof Detector. The938* returned set will become invalid if the spoof detector is closed,939* or if a new set of allowed characters is specified.940*941*942* @param sc The USpoofChecker943* @param status The error code, set if this function encounters a problem.944* @return A USet containing the characters that are permitted by945* the USPOOF_CHAR_LIMIT test.946* @stable ICU 4.2947*/948U_CAPI const USet * U_EXPORT2949uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status);950951952/**953* Check the specified string for possible security issues.954* The text to be checked will typically be an identifier of some sort.955* The set of checks to be performed is specified with uspoof_setChecks().956*957* \note958* Consider using the newer API, {@link uspoof_check2}, instead.959* The newer API exposes additional information from the check procedure960* and is otherwise identical to this method.961*962* @param sc The USpoofChecker963* @param id The identifier to be checked for possible security issues,964* in UTF-16 format.965* @param length the length of the string to be checked, expressed in966* 16 bit UTF-16 code units, or -1 if the string is967* zero terminated.968* @param position Deprecated in ICU 51. Always returns zero.969* Originally, an out parameter for the index of the first970* string position that failed a check.971* This parameter may be NULL.972* @param status The error code, set if an error occurred while attempting to973* perform the check.974* Spoofing or security issues detected with the input string are975* not reported here, but through the function's return value.976* @return An integer value with bits set for any potential security977* or spoofing issues detected. The bits are defined by978* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)979* will be zero if the input string passes all of the980* enabled checks.981* @see uspoof_check2982* @stable ICU 4.2983*/984U_CAPI int32_t U_EXPORT2985uspoof_check(const USpoofChecker *sc,986const UChar *id, int32_t length,987int32_t *position,988UErrorCode *status);989990991/**992* Check the specified string for possible security issues.993* The text to be checked will typically be an identifier of some sort.994* The set of checks to be performed is specified with uspoof_setChecks().995*996* \note997* Consider using the newer API, {@link uspoof_check2UTF8}, instead.998* The newer API exposes additional information from the check procedure999* and is otherwise identical to this method.1000*1001* @param sc The USpoofChecker1002* @param id A identifier to be checked for possible security issues, in UTF8 format.1003* @param length the length of the string to be checked, or -1 if the string is1004* zero terminated.1005* @param position Deprecated in ICU 51. Always returns zero.1006* Originally, an out parameter for the index of the first1007* string position that failed a check.1008* This parameter may be NULL.1009* @param status The error code, set if an error occurred while attempting to1010* perform the check.1011* Spoofing or security issues detected with the input string are1012* not reported here, but through the function's return value.1013* If the input contains invalid UTF-8 sequences,1014* a status of U_INVALID_CHAR_FOUND will be returned.1015* @return An integer value with bits set for any potential security1016* or spoofing issues detected. The bits are defined by1017* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)1018* will be zero if the input string passes all of the1019* enabled checks.1020* @see uspoof_check2UTF81021* @stable ICU 4.21022*/1023U_CAPI int32_t U_EXPORT21024uspoof_checkUTF8(const USpoofChecker *sc,1025const char *id, int32_t length,1026int32_t *position,1027UErrorCode *status);102810291030/**1031* Check the specified string for possible security issues.1032* The text to be checked will typically be an identifier of some sort.1033* The set of checks to be performed is specified with uspoof_setChecks().1034*1035* @param sc The USpoofChecker1036* @param id The identifier to be checked for possible security issues,1037* in UTF-16 format.1038* @param length the length of the string to be checked, or -1 if the string is1039* zero terminated.1040* @param checkResult An instance of USpoofCheckResult to be filled with1041* details about the identifier. Can be NULL.1042* @param status The error code, set if an error occurred while attempting to1043* perform the check.1044* Spoofing or security issues detected with the input string are1045* not reported here, but through the function's return value.1046* @return An integer value with bits set for any potential security1047* or spoofing issues detected. The bits are defined by1048* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)1049* will be zero if the input string passes all of the1050* enabled checks. Any information in this bitmask will be1051* consistent with the information saved in the optional1052* checkResult parameter.1053* @see uspoof_openCheckResult1054* @see uspoof_check2UTF81055* @see uspoof_check2UnicodeString1056* @stable ICU 581057*/1058U_CAPI int32_t U_EXPORT21059uspoof_check2(const USpoofChecker *sc,1060const UChar* id, int32_t length,1061USpoofCheckResult* checkResult,1062UErrorCode *status);10631064/**1065* Check the specified string for possible security issues.1066* The text to be checked will typically be an identifier of some sort.1067* The set of checks to be performed is specified with uspoof_setChecks().1068*1069* This version of {@link uspoof_check} accepts a USpoofCheckResult, which1070* returns additional information about the identifier. For more1071* information, see {@link uspoof_openCheckResult}.1072*1073* @param sc The USpoofChecker1074* @param id A identifier to be checked for possible security issues, in UTF8 format.1075* @param length the length of the string to be checked, or -1 if the string is1076* zero terminated.1077* @param checkResult An instance of USpoofCheckResult to be filled with1078* details about the identifier. Can be NULL.1079* @param status The error code, set if an error occurred while attempting to1080* perform the check.1081* Spoofing or security issues detected with the input string are1082* not reported here, but through the function's return value.1083* @return An integer value with bits set for any potential security1084* or spoofing issues detected. The bits are defined by1085* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)1086* will be zero if the input string passes all of the1087* enabled checks. Any information in this bitmask will be1088* consistent with the information saved in the optional1089* checkResult parameter.1090* @see uspoof_openCheckResult1091* @see uspoof_check21092* @see uspoof_check2UnicodeString1093* @stable ICU 581094*/1095U_CAPI int32_t U_EXPORT21096uspoof_check2UTF8(const USpoofChecker *sc,1097const char *id, int32_t length,1098USpoofCheckResult* checkResult,1099UErrorCode *status);11001101/**1102* Create a USpoofCheckResult, used by the {@link uspoof_check2} class of functions to return1103* information about the identifier. Information includes:1104* <ul>1105* <li>A bitmask of the checks that failed</li>1106* <li>The identifier's restriction level (UTS 39 section 5.2)</li>1107* <li>The set of numerics in the string (UTS 39 section 5.3)</li>1108* </ul>1109* The data held in a USpoofCheckResult is cleared whenever it is passed into a new call1110* of {@link uspoof_check2}.1111*1112* @param status The error code, set if this function encounters a problem.1113* @return the newly created USpoofCheckResult1114* @see uspoof_check21115* @see uspoof_check2UTF81116* @see uspoof_check2UnicodeString1117* @stable ICU 581118*/1119U_CAPI USpoofCheckResult* U_EXPORT21120uspoof_openCheckResult(UErrorCode *status);11211122/**1123* Close a USpoofCheckResult, freeing any memory that was being held by1124* its implementation.1125*1126* @param checkResult The instance of USpoofCheckResult to close1127* @stable ICU 581128*/1129U_CAPI void U_EXPORT21130uspoof_closeCheckResult(USpoofCheckResult *checkResult);11311132/**1133* Indicates which of the spoof check(s) have failed. The value is a bitwise OR of the constants for the tests1134* in question: USPOOF_RESTRICTION_LEVEL, USPOOF_CHAR_LIMIT, and so on.1135*1136* @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}1137* @param status The error code, set if an error occurred.1138* @return An integer value with bits set for any potential security1139* or spoofing issues detected. The bits are defined by1140* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)1141* will be zero if the input string passes all of the1142* enabled checks.1143* @see uspoof_setChecks1144* @stable ICU 581145*/1146U_CAPI int32_t U_EXPORT21147uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status);11481149/**1150* Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check1151* was enabled; otherwise, undefined.1152*1153* @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}1154* @param status The error code, set if an error occurred.1155* @return The restriction level contained in the USpoofCheckResult1156* @see uspoof_setRestrictionLevel1157* @stable ICU 581158*/1159U_CAPI URestrictionLevel U_EXPORT21160uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status);11611162/**1163* Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled;1164* otherwise, undefined. The set will contain the zero digit from each decimal number system found1165* in the input string. Ownership of the returned USet remains with the USpoofCheckResult.1166* The USet will be free'd when {@link uspoof_closeCheckResult} is called.1167*1168* @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}1169* @return The set of numerics contained in the USpoofCheckResult1170* @param status The error code, set if an error occurred.1171* @stable ICU 581172*/1173U_CAPI const USet* U_EXPORT21174uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status);117511761177/**1178* Check whether two specified strings are visually confusable.1179*1180* If the strings are confusable, the return value will be nonzero, as long as1181* {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().1182*1183* The bits in the return value correspond to flags for each of the classes of1184* confusables applicable to the two input strings. According to UTS 391185* section 4, the possible flags are:1186*1187* <ul>1188* <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>1189* <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>1190* <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>1191* </ul>1192*1193* If one or more of the above flags were not listed in uspoof_setChecks(), this1194* function will never report that class of confusable. The check1195* {@link USPOOF_CONFUSABLE} enables all three flags.1196*1197*1198* @param sc The USpoofChecker1199* @param id1 The first of the two identifiers to be compared for1200* confusability. The strings are in UTF-16 format.1201* @param length1 the length of the first identifier, expressed in1202* 16 bit UTF-16 code units, or -1 if the string is1203* nul terminated.1204* @param id2 The second of the two identifiers to be compared for1205* confusability. The identifiers are in UTF-16 format.1206* @param length2 The length of the second identifiers, expressed in1207* 16 bit UTF-16 code units, or -1 if the string is1208* nul terminated.1209* @param status The error code, set if an error occurred while attempting to1210* perform the check.1211* Confusability of the identifiers is not reported here,1212* but through this function's return value.1213* @return An integer value with bit(s) set corresponding to1214* the type of confusability found, as defined by1215* enum USpoofChecks. Zero is returned if the identifiers1216* are not confusable.1217*1218* @stable ICU 4.21219*/1220U_CAPI int32_t U_EXPORT21221uspoof_areConfusable(const USpoofChecker *sc,1222const UChar *id1, int32_t length1,1223const UChar *id2, int32_t length2,1224UErrorCode *status);12251226/**1227* Check whether two specified strings are visually confusable when1228* displayed in a context with the given paragraph direction.1229*1230* If the strings are confusable, the return value will be nonzero, as long as1231* {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().1232*1233* The bits in the return value correspond to flags for each of the classes of1234* confusables applicable to the two input strings. According to UTS 391235* section 4, the possible flags are:1236*1237* <ul>1238* <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>1239* <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>1240* <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>1241* </ul>1242*1243* If one or more of the above flags were not listed in uspoof_setChecks(), this1244* function will never report that class of confusable. The check1245* {@link USPOOF_CONFUSABLE} enables all three flags.1246*1247*1248* @param sc The USpoofChecker1249* @param direction The paragraph direction with which the identifiers are1250* displayed. Must be either UBIDI_LTR or UBIDI_RTL.1251* @param id1 The first of the two identifiers to be compared for1252* confusability. The strings are in UTF-16 format.1253* @param length1 the length of the first identifier, expressed in1254* 16 bit UTF-16 code units, or -1 if the string is1255* nul terminated.1256* @param id2 The second of the two identifiers to be compared for1257* confusability. The identifiers are in UTF-16 format.1258* @param length2 The length of the second identifiers, expressed in1259* 16 bit UTF-16 code units, or -1 if the string is1260* nul terminated.1261* @param status The error code, set if an error occurred while attempting to1262* perform the check.1263* Confusability of the identifiers is not reported here,1264* but through this function's return value.1265* @return An integer value with bit(s) set corresponding to1266* the type of confusability found, as defined by1267* enum USpoofChecks. Zero is returned if the identifiers1268* are not confusable.1269*1270* @stable ICU 741271*/1272U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,1273const UChar *id1, int32_t length1,1274const UChar *id2, int32_t length2,1275UErrorCode *status);12761277/**1278* A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format.1279*1280* @param sc The USpoofChecker1281* @param id1 The first of the two identifiers to be compared for1282* confusability. The strings are in UTF-8 format.1283* @param length1 the length of the first identifiers, in bytes, or -11284* if the string is nul terminated.1285* @param id2 The second of the two identifiers to be compared for1286* confusability. The strings are in UTF-8 format.1287* @param length2 The length of the second string in bytes, or -11288* if the string is nul terminated.1289* @param status The error code, set if an error occurred while attempting to1290* perform the check.1291* Confusability of the strings is not reported here,1292* but through this function's return value.1293* @return An integer value with bit(s) set corresponding to1294* the type of confusability found, as defined by1295* enum USpoofChecks. Zero is returned if the strings1296* are not confusable.1297*1298* @stable ICU 4.21299*1300* @see uspoof_areConfusable1301*/1302U_CAPI int32_t U_EXPORT21303uspoof_areConfusableUTF8(const USpoofChecker *sc,1304const char *id1, int32_t length1,1305const char *id2, int32_t length2,1306UErrorCode *status);13071308/**1309* A version of {@link uspoof_areBidiConfusable} accepting strings in UTF-8 format.1310*1311* @param sc The USpoofChecker1312* @param direction The paragraph direction with which the identifiers are1313* displayed. Must be either UBIDI_LTR or UBIDI_RTL.1314* @param id1 The first of the two identifiers to be compared for1315* confusability. The strings are in UTF-8 format.1316* @param length1 the length of the first identifiers, in bytes, or -11317* if the string is nul terminated.1318* @param id2 The second of the two identifiers to be compared for1319* confusability. The strings are in UTF-8 format.1320* @param length2 The length of the second string in bytes, or -11321* if the string is nul terminated.1322* @param status The error code, set if an error occurred while attempting to1323* perform the check.1324* Confusability of the strings is not reported here,1325* but through this function's return value.1326* @return An integer value with bit(s) set corresponding to1327* the type of confusability found, as defined by1328* enum USpoofChecks. Zero is returned if the strings1329* are not confusable.1330*1331* @stable ICU 741332*1333* @see uspoof_areBidiConfusable1334*/1335U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,1336const char *id1, int32_t length1,1337const char *id2, int32_t length2,1338UErrorCode *status);13391340/**1341* Get the "skeleton" for an identifier.1342* Skeletons are a transformation of the input identifier;1343* Two identifiers are confusable if their skeletons are identical.1344* See Unicode Technical Standard #39 for additional information.1345*1346* Using skeletons directly makes it possible to quickly check1347* whether an identifier is confusable with any of some large1348* set of existing identifiers, by creating an efficiently1349* searchable collection of the skeletons.1350*1351* @param sc The USpoofChecker1352* @param type Deprecated in ICU 58. You may pass any number.1353* Originally, controlled which of the Unicode confusable data1354* tables to use.1355* @param id The input identifier whose skeleton will be computed.1356* @param length The length of the input identifier, expressed in 16 bit1357* UTF-16 code units, or -1 if the string is zero terminated.1358* @param dest The output buffer, to receive the skeleton string.1359* @param destCapacity The length of the output buffer, in 16 bit units.1360* The destCapacity may be zero, in which case the function will1361* return the actual length of the skeleton.1362* @param status The error code, set if an error occurred while attempting to1363* perform the check.1364* @return The length of the skeleton string. The returned length1365* is always that of the complete skeleton, even when the1366* supplied buffer is too small (or of zero length)1367*1368* @stable ICU 4.21369* @see uspoof_areConfusable1370*/1371U_CAPI int32_t U_EXPORT21372uspoof_getSkeleton(const USpoofChecker *sc,1373uint32_t type,1374const UChar *id, int32_t length,1375UChar *dest, int32_t destCapacity,1376UErrorCode *status);13771378/**1379* Get the "bidiSkeleton" for an identifier and a direction.1380* Skeletons are a transformation of the input identifier;1381* Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;1382* they are RTL-confusable if their RTL bidiSkeletons are identical.1383* See Unicode Technical Standard #39 for additional information:1384* https://www.unicode.org/reports/tr39/#Confusable_Detection.1385*1386* Using skeletons directly makes it possible to quickly check1387* whether an identifier is confusable with any of some large1388* set of existing identifiers, by creating an efficiently1389* searchable collection of the skeletons.1390*1391* @param sc The USpoofChecker.1392* @param direction The context direction with which the identifier will be1393* displayed. Must be either UBIDI_LTR or UBIDI_RTL.1394* @param id The input identifier whose skeleton will be computed.1395* @param length The length of the input identifier, expressed in 16 bit1396* UTF-16 code units, or -1 if the string is zero terminated.1397* @param dest The output buffer, to receive the skeleton string.1398* @param destCapacity The length of the output buffer, in 16 bit units.1399* The destCapacity may be zero, in which case the function will1400* return the actual length of the skeleton.1401* @param status The error code, set if an error occurred while attempting to1402* perform the check.1403* @return The length of the skeleton string. The returned length1404* is always that of the complete skeleton, even when the1405* supplied buffer is too small (or of zero length)1406*1407* @stable ICU 741408* @see uspoof_areBidiConfusable1409*/1410U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc,1411UBiDiDirection direction,1412const UChar *id, int32_t length,1413UChar *dest, int32_t destCapacity, UErrorCode *status);14141415/**1416* Get the "skeleton" for an identifier.1417* Skeletons are a transformation of the input identifier;1418* Two identifiers are confusable if their skeletons are identical.1419* See Unicode Technical Standard #39 for additional information.1420*1421* Using skeletons directly makes it possible to quickly check1422* whether an identifier is confusable with any of some large1423* set of existing identifiers, by creating an efficiently1424* searchable collection of the skeletons.1425*1426* @param sc The USpoofChecker1427* @param type Deprecated in ICU 58. You may pass any number.1428* Originally, controlled which of the Unicode confusable data1429* tables to use.1430* @param id The UTF-8 format identifier whose skeleton will be computed.1431* @param length The length of the input string, in bytes,1432* or -1 if the string is zero terminated.1433* @param dest The output buffer, to receive the skeleton string.1434* @param destCapacity The length of the output buffer, in bytes.1435* The destCapacity may be zero, in which case the function will1436* return the actual length of the skeleton.1437* @param status The error code, set if an error occurred while attempting to1438* perform the check. Possible Errors include U_INVALID_CHAR_FOUND1439* for invalid UTF-8 sequences, and1440* U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small1441* to hold the complete skeleton.1442* @return The length of the skeleton string, in bytes. The returned length1443* is always that of the complete skeleton, even when the1444* supplied buffer is too small (or of zero length)1445*1446* @stable ICU 4.21447*/1448U_CAPI int32_t U_EXPORT21449uspoof_getSkeletonUTF8(const USpoofChecker *sc,1450uint32_t type,1451const char *id, int32_t length,1452char *dest, int32_t destCapacity,1453UErrorCode *status);14541455/**1456* Get the "bidiSkeleton" for an identifier and a direction.1457* Skeletons are a transformation of the input identifier;1458* Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;1459* they are RTL-confusable if their RTL bidiSkeletons are identical.1460* See Unicode Technical Standard #39 for additional information:1461* https://www.unicode.org/reports/tr39/#Confusable_Detection.1462*1463* Using skeletons directly makes it possible to quickly check1464* whether an identifier is confusable with any of some large1465* set of existing identifiers, by creating an efficiently1466* searchable collection of the skeletons.1467*1468* @param sc The USpoofChecker1469* @param direction The context direction with which the identifier will be1470* displayed. Must be either UBIDI_LTR or UBIDI_RTL.1471* @param id The UTF-8 format identifier whose skeleton will be computed.1472* @param length The length of the input string, in bytes,1473* or -1 if the string is zero terminated.1474* @param dest The output buffer, to receive the skeleton string.1475* @param destCapacity The length of the output buffer, in bytes.1476* The destCapacity may be zero, in which case the function will1477* return the actual length of the skeleton.1478* @param status The error code, set if an error occurred while attempting to1479* perform the check. Possible Errors include U_INVALID_CHAR_FOUND1480* for invalid UTF-8 sequences, and1481* U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small1482* to hold the complete skeleton.1483* @return The length of the skeleton string, in bytes. The returned length1484* is always that of the complete skeleton, even when the1485* supplied buffer is too small (or of zero length)1486*1487* @stable ICU 741488*/1489U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,1490const char *id, int32_t length, char *dest,1491int32_t destCapacity, UErrorCode *status);14921493/**1494* Get the set of Candidate Characters for Inclusion in Identifiers, as defined1495* in http://unicode.org/Public/security/latest/xidmodifications.txt1496* and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.1497*1498* The returned set is frozen. Ownership of the set remains with the ICU library; it must not1499* be deleted by the caller.1500*1501* @param status The error code, set if a problem occurs while creating the set.1502*1503* @stable ICU 511504*/1505U_CAPI const USet * U_EXPORT21506uspoof_getInclusionSet(UErrorCode *status);15071508/**1509* Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined1510* in http://unicode.org/Public/security/latest/xidmodifications.txt1511* and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.1512*1513* The returned set is frozen. Ownership of the set remains with the ICU library; it must not1514* be deleted by the caller.1515*1516* @param status The error code, set if a problem occurs while creating the set.1517*1518* @stable ICU 511519*/1520U_CAPI const USet * U_EXPORT21521uspoof_getRecommendedSet(UErrorCode *status);15221523/**1524* Serialize the data for a spoof detector into a chunk of memory.1525* The flattened spoof detection tables can later be used to efficiently1526* instantiate a new Spoof Detector.1527*1528* The serialized spoof checker includes only the data compiled from the1529* Unicode data tables by uspoof_openFromSource(); it does not include1530* include any other state or configuration that may have been set.1531*1532* @param sc the Spoof Detector whose data is to be serialized.1533* @param data a pointer to 32-bit-aligned memory to be filled with the data,1534* can be NULL if capacity==01535* @param capacity the number of bytes available at data,1536* or 0 for preflighting1537* @param status an in/out ICU UErrorCode; possible errors include:1538* - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization1539* - U_ILLEGAL_ARGUMENT_ERROR the data or capacity parameters are bad1540* @return the number of bytes written or needed for the spoof data1541*1542* @see utrie2_openFromSerialized()1543* @stable ICU 4.21544*/1545U_CAPI int32_t U_EXPORT21546uspoof_serialize(USpoofChecker *sc,1547void *data, int32_t capacity,1548UErrorCode *status);15491550U_CDECL_END15511552#if U_SHOW_CPLUSPLUS_API15531554U_NAMESPACE_BEGIN15551556/**1557* \class LocalUSpoofCheckerPointer1558* "Smart pointer" class, closes a USpoofChecker via uspoof_close().1559* For most methods see the LocalPointerBase base class.1560*1561* @see LocalPointerBase1562* @see LocalPointer1563* @stable ICU 4.41564*/1565/**1566* \cond1567* Note: Doxygen is giving a bogus warning on this U_DEFINE_LOCAL_OPEN_POINTER.1568* For now, suppress with a Doxygen cond1569*/1570U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckerPointer, USpoofChecker, uspoof_close);1571/** \endcond */15721573/**1574* \class LocalUSpoofCheckResultPointer1575* "Smart pointer" class, closes a USpoofCheckResult via `uspoof_closeCheckResult()`.1576* For most methods see the LocalPointerBase base class.1577*1578* @see LocalPointerBase1579* @see LocalPointer1580* @stable ICU 581581*/15821583/**1584* \cond1585* Note: Doxygen is giving a bogus warning on this U_DEFINE_LOCAL_OPEN_POINTER.1586* For now, suppress with a Doxygen cond1587*/1588U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckResultPointer, USpoofCheckResult, uspoof_closeCheckResult);1589/** \endcond */15901591U_NAMESPACE_END15921593/**1594* Limit the acceptable characters to those specified by a Unicode Set.1595* Any previously specified character limit is1596* is replaced by the new settings. This includes limits on1597* characters that were set with the uspoof_setAllowedLocales() function.1598*1599* The USPOOF_CHAR_LIMIT test is automatically enabled for this1600* USoofChecker by this function.1601*1602* @param sc The USpoofChecker1603* @param chars A Unicode Set containing the list of1604* characters that are permitted. Ownership of the set1605* remains with the caller. The incoming set is cloned by1606* this function, so there are no restrictions on modifying1607* or deleting the UnicodeSet after calling this function.1608* @param status The error code, set if this function encounters a problem.1609* @stable ICU 4.21610*/1611U_CAPI void U_EXPORT21612uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status);161316141615/**1616* Get a UnicodeSet for the characters permitted in an identifier.1617* This corresponds to the limits imposed by the Set Allowed Characters /1618* UnicodeSet functions. Limitations imposed by other checks will not be1619* reflected in the set returned by this function.1620*1621* The returned set will be frozen, meaning that it cannot be modified1622* by the caller.1623*1624* Ownership of the returned set remains with the Spoof Detector. The1625* returned set will become invalid if the spoof detector is closed,1626* or if a new set of allowed characters is specified.1627*1628*1629* @param sc The USpoofChecker1630* @param status The error code, set if this function encounters a problem.1631* @return A UnicodeSet containing the characters that are permitted by1632* the USPOOF_CHAR_LIMIT test.1633* @stable ICU 4.21634*/1635U_CAPI const icu::UnicodeSet * U_EXPORT21636uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);16371638/**1639* Check the specified string for possible security issues.1640* The text to be checked will typically be an identifier of some sort.1641* The set of checks to be performed is specified with uspoof_setChecks().1642*1643* \note1644* Consider using the newer API, {@link uspoof_check2UnicodeString}, instead.1645* The newer API exposes additional information from the check procedure1646* and is otherwise identical to this method.1647*1648* @param sc The USpoofChecker1649* @param id A identifier to be checked for possible security issues.1650* @param position Deprecated in ICU 51. Always returns zero.1651* Originally, an out parameter for the index of the first1652* string position that failed a check.1653* This parameter may be nullptr.1654* @param status The error code, set if an error occurred while attempting to1655* perform the check.1656* Spoofing or security issues detected with the input string are1657* not reported here, but through the function's return value.1658* @return An integer value with bits set for any potential security1659* or spoofing issues detected. The bits are defined by1660* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)1661* will be zero if the input string passes all of the1662* enabled checks.1663* @see uspoof_check2UnicodeString1664* @stable ICU 4.21665*/1666U_CAPI int32_t U_EXPORT21667uspoof_checkUnicodeString(const USpoofChecker *sc,1668const icu::UnicodeString &id,1669int32_t *position,1670UErrorCode *status);16711672/**1673* Check the specified string for possible security issues.1674* The text to be checked will typically be an identifier of some sort.1675* The set of checks to be performed is specified with uspoof_setChecks().1676*1677* @param sc The USpoofChecker1678* @param id A identifier to be checked for possible security issues.1679* @param checkResult An instance of USpoofCheckResult to be filled with1680* details about the identifier. Can be nullptr.1681* @param status The error code, set if an error occurred while attempting to1682* perform the check.1683* Spoofing or security issues detected with the input string are1684* not reported here, but through the function's return value.1685* @return An integer value with bits set for any potential security1686* or spoofing issues detected. The bits are defined by1687* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)1688* will be zero if the input string passes all of the1689* enabled checks. Any information in this bitmask will be1690* consistent with the information saved in the optional1691* checkResult parameter.1692* @see uspoof_openCheckResult1693* @see uspoof_check21694* @see uspoof_check2UTF81695* @stable ICU 581696*/1697U_CAPI int32_t U_EXPORT21698uspoof_check2UnicodeString(const USpoofChecker *sc,1699const icu::UnicodeString &id,1700USpoofCheckResult* checkResult,1701UErrorCode *status);17021703/**1704* A version of {@link uspoof_areConfusable} accepting UnicodeStrings.1705*1706* @param sc The USpoofChecker1707* @param s1 The first of the two identifiers to be compared for1708* confusability. The strings are in UTF-8 format.1709* @param s2 The second of the two identifiers to be compared for1710* confusability. The strings are in UTF-8 format.1711* @param status The error code, set if an error occurred while attempting to1712* perform the check.1713* Confusability of the identifiers is not reported here,1714* but through this function's return value.1715* @return An integer value with bit(s) set corresponding to1716* the type of confusability found, as defined by1717* enum USpoofChecks. Zero is returned if the identifiers1718* are not confusable.1719*1720* @stable ICU 4.21721*1722* @see uspoof_areConfusable1723*/1724U_CAPI int32_t U_EXPORT21725uspoof_areConfusableUnicodeString(const USpoofChecker *sc,1726const icu::UnicodeString &s1,1727const icu::UnicodeString &s2,1728UErrorCode *status);17291730/**1731* A version of {@link uspoof_areBidiConfusable} accepting UnicodeStrings.1732*1733* @param sc The USpoofChecker1734* @param direction The paragraph direction with which the identifiers are1735* displayed. Must be either UBIDI_LTR or UBIDI_RTL.1736* @param s1 The first of the two identifiers to be compared for1737* confusability. The strings are in UTF-8 format.1738* @param s2 The second of the two identifiers to be compared for1739* confusability. The strings are in UTF-8 format.1740* @param status The error code, set if an error occurred while attempting to1741* perform the check.1742* Confusability of the identifiers is not reported here,1743* but through this function's return value.1744* @return An integer value with bit(s) set corresponding to1745* the type of confusability found, as defined by1746* enum USpoofChecks. Zero is returned if the identifiers1747* are not confusable.1748*1749* @stable ICU 741750*1751* @see uspoof_areBidiConfusable1752*/1753U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,1754UBiDiDirection direction,1755const icu::UnicodeString &s1,1756const icu::UnicodeString &s2,1757UErrorCode *status);17581759/**1760* Get the "skeleton" for an identifier.1761* Skeletons are a transformation of the input identifier;1762* Two identifiers are confusable if their skeletons are identical.1763* See Unicode Technical Standard #39 for additional information.1764*1765* Using skeletons directly makes it possible to quickly check1766* whether an identifier is confusable with any of some large1767* set of existing identifiers, by creating an efficiently1768* searchable collection of the skeletons.1769*1770* @param sc The USpoofChecker.1771* @param type Deprecated in ICU 58. You may pass any number.1772* Originally, controlled which of the Unicode confusable data1773* tables to use.1774* @param id The input identifier whose skeleton will be computed.1775* @param dest The output identifier, to receive the skeleton string.1776* @param status The error code, set if an error occurred while attempting to1777* perform the check.1778* @return A reference to the destination (skeleton) string.1779*1780* @stable ICU 4.21781*/1782U_I18N_API icu::UnicodeString & U_EXPORT21783uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,1784uint32_t type,1785const icu::UnicodeString &id,1786icu::UnicodeString &dest,1787UErrorCode *status);17881789/**1790* Get the "bidiSkeleton" for an identifier and a direction.1791* Skeletons are a transformation of the input identifier;1792* Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;1793* they are RTL-confusable if their RTL bidiSkeletons are identical.1794* See Unicode Technical Standard #39 for additional information.1795* https://www.unicode.org/reports/tr39/#Confusable_Detection.1796*1797* Using skeletons directly makes it possible to quickly check1798* whether an identifier is confusable with any of some large1799* set of existing identifiers, by creating an efficiently1800* searchable collection of the skeletons.1801*1802* @param sc The USpoofChecker.1803* @param direction The context direction with which the identifier will be1804* displayed. Must be either UBIDI_LTR or UBIDI_RTL.1805* @param id The input identifier whose bidiSkeleton will be computed.1806* @param dest The output identifier, to receive the skeleton string.1807* @param status The error code, set if an error occurred while attempting to1808* perform the check.1809* @return A reference to the destination (skeleton) string.1810*1811* @stable ICU 741812*/1813U_I18N_API icu::UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(1814const USpoofChecker *sc, UBiDiDirection direction, const icu::UnicodeString &id,1815icu::UnicodeString &dest, UErrorCode *status);18161817/**1818* Get the set of Candidate Characters for Inclusion in Identifiers, as defined1819* in http://unicode.org/Public/security/latest/xidmodifications.txt1820* and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.1821*1822* The returned set is frozen. Ownership of the set remains with the ICU library; it must not1823* be deleted by the caller.1824*1825* @param status The error code, set if a problem occurs while creating the set.1826*1827* @stable ICU 511828*/1829U_CAPI const icu::UnicodeSet * U_EXPORT21830uspoof_getInclusionUnicodeSet(UErrorCode *status);18311832/**1833* Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined1834* in http://unicode.org/Public/security/latest/xidmodifications.txt1835* and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.1836*1837* The returned set is frozen. Ownership of the set remains with the ICU library; it must not1838* be deleted by the caller.1839*1840* @param status The error code, set if a problem occurs while creating the set.1841*1842* @stable ICU 511843*/1844U_CAPI const icu::UnicodeSet * U_EXPORT21845uspoof_getRecommendedUnicodeSet(UErrorCode *status);18461847#endif /* U_SHOW_CPLUSPLUS_API */18481849#endif /* UCONFIG_NO_NORMALIZATION */18501851#endif /* USPOOF_H */185218531854