Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/uchar.h
38827 views
/*1**********************************************************************2* Copyright (C) 1997-2016, International Business Machines3* Corporation and others. All Rights Reserved.4**********************************************************************5*6* File UCHAR.H7*8* Modification History:9*10* Date Name Description11* 04/02/97 aliu Creation.12* 03/29/99 helena Updated for C APIs.13* 4/15/99 Madhu Updated for C Implementation and Javadoc14* 5/20/99 Madhu Added the function u_getVersion()15* 8/19/1999 srl Upgraded scripts to Unicode 3.016* 8/27/1999 schererm UCharDirection constants: U_...17* 11/11/1999 weiv added u_isalnum(), cleaned comments18* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().19******************************************************************************20*/2122#ifndef UCHAR_H23#define UCHAR_H2425#include "unicode/utypes.h"2627U_CDECL_BEGIN2829/*==========================================================================*/30/* Unicode version number */31/*==========================================================================*/32/**33* Unicode version number, default for the current ICU version.34* The actual Unicode Character Database (UCD) data is stored in uprops.dat35* and may be generated from UCD files from a different Unicode version.36* Call u_getUnicodeVersion to get the actual Unicode version of the data.37*38* @see u_getUnicodeVersion39* @stable ICU 2.040*/41#define U_UNICODE_VERSION "8.0"4243/**44* \file45* \brief C API: Unicode Properties46*47* This C API provides low-level access to the Unicode Character Database.48* In addition to raw property values, some convenience functions calculate49* derived properties, for example for Java-style programming.50*51* Unicode assigns each code point (not just assigned character) values for52* many properties.53* Most of them are simple boolean flags, or constants from a small enumerated list.54* For some properties, values are strings or other relatively more complex types.55*56* For more information see57* "About the Unicode Character Database" (http://www.unicode.org/ucd/)58* and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).59*60* Many functions are designed to match java.lang.Character functions.61* See the individual function documentation,62* and see the JDK 1.4 java.lang.Character documentation63* at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html64*65* There are also functions that provide easy migration from C/POSIX functions66* like isblank(). Their use is generally discouraged because the C/POSIX67* standards do not define their semantics beyond the ASCII range, which means68* that different implementations exhibit very different behavior.69* Instead, Unicode properties should be used directly.70*71* There are also only a few, broad C/POSIX character classes, and they tend72* to be used for conflicting purposes. For example, the "isalpha()" class73* is sometimes used to determine word boundaries, while a more sophisticated74* approach would at least distinguish initial letters from continuation75* characters (the latter including combining marks).76* (In ICU, BreakIterator is the most sophisticated API for word boundaries.)77* Another example: There is no "istitle()" class for titlecase characters.78*79* ICU 3.4 and later provides API access for all twelve C/POSIX character classes.80* ICU implements them according to the Standard Recommendations in81* Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions82* (http://www.unicode.org/reports/tr18/#Compatibility_Properties).83*84* API access for C/POSIX character classes is as follows:85* - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)86* - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)87* - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)88* - punct: u_ispunct(c)89* - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER90* - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)91* - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)92* - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)93* - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)94* - cntrl: u_charType(c)==U_CONTROL_CHAR95* - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)96* - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)97*98* Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,99* the Standard Recommendations in UTS #18. Instead, they match Java100* functions according to their API documentation.101*102* \htmlonly103* The C/POSIX character classes are also available in UnicodeSet patterns,104* using patterns like [:graph:] or \p{graph}.105* \endhtmlonly106*107* Note: There are several ICU whitespace functions.108* Comparison:109* - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;110* most of general categories "Z" (separators) + most whitespace ISO controls111* (including no-break spaces, but excluding IS1..IS4 and ZWSP)112* - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces113* - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)114* - u_isspace: Z + whitespace ISO controls (including no-break spaces)115* - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP116*/117118/**119* Constants.120*/121122/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */123#define UCHAR_MIN_VALUE 0124125/**126* The highest Unicode code point value (scalar value) according to127* The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).128* For a single character, UChar32 is a simple type that can hold any code point value.129*130* @see UChar32131* @stable ICU 2.0132*/133#define UCHAR_MAX_VALUE 0x10ffff134135/**136* Get a single-bit bit set (a flag) from a bit number 0..31.137* @stable ICU 2.1138*/139#define U_MASK(x) ((uint32_t)1<<(x))140141/**142* Selection constants for Unicode properties.143* These constants are used in functions like u_hasBinaryProperty to select144* one of the Unicode properties.145*146* The properties APIs are intended to reflect Unicode properties as defined147* in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).148* For details about the properties see http://www.unicode.org/ucd/ .149* For names of Unicode properties see the UCD file PropertyAliases.txt.150*151* Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,152* then properties marked with "new in Unicode 3.2" are not or not fully available.153* Check u_getUnicodeVersion to be sure.154*155* @see u_hasBinaryProperty156* @see u_getIntPropertyValue157* @see u_getUnicodeVersion158* @stable ICU 2.1159*/160typedef enum UProperty {161/*162* Note: UProperty constants are parsed by preparseucd.py.163* It matches lines like164* UCHAR_<Unicode property name>=<integer>,165*/166167/* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that168debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,169rather than UCHAR_BINARY_START. Likewise for other *_START170identifiers. */171172/** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.173Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */174UCHAR_ALPHABETIC=0,175/** First constant for binary Unicode properties. @stable ICU 2.1 */176UCHAR_BINARY_START=UCHAR_ALPHABETIC,177/** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */178UCHAR_ASCII_HEX_DIGIT=1,179/** Binary property Bidi_Control.180Format controls which have specific functions181in the Bidi Algorithm. @stable ICU 2.1 */182UCHAR_BIDI_CONTROL=2,183/** Binary property Bidi_Mirrored.184Characters that may change display in RTL text.185Same as u_isMirrored.186See Bidi Algorithm, UTR 9. @stable ICU 2.1 */187UCHAR_BIDI_MIRRORED=3,188/** Binary property Dash. Variations of dashes. @stable ICU 2.1 */189UCHAR_DASH=4,190/** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).191Ignorable in most processing.192<2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */193UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5,194/** Binary property Deprecated (new in Unicode 3.2).195The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */196UCHAR_DEPRECATED=6,197/** Binary property Diacritic. Characters that linguistically modify198the meaning of another character to which they apply. @stable ICU 2.1 */199UCHAR_DIACRITIC=7,200/** Binary property Extender.201Extend the value or shape of a preceding alphabetic character,202e.g., length and iteration marks. @stable ICU 2.1 */203UCHAR_EXTENDER=8,204/** Binary property Full_Composition_Exclusion.205CompositionExclusions.txt+Singleton Decompositions+206Non-Starter Decompositions. @stable ICU 2.1 */207UCHAR_FULL_COMPOSITION_EXCLUSION=9,208/** Binary property Grapheme_Base (new in Unicode 3.2).209For programmatic determination of grapheme cluster boundaries.210[0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */211UCHAR_GRAPHEME_BASE=10,212/** Binary property Grapheme_Extend (new in Unicode 3.2).213For programmatic determination of grapheme cluster boundaries.214Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */215UCHAR_GRAPHEME_EXTEND=11,216/** Binary property Grapheme_Link (new in Unicode 3.2).217For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */218UCHAR_GRAPHEME_LINK=12,219/** Binary property Hex_Digit.220Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */221UCHAR_HEX_DIGIT=13,222/** Binary property Hyphen. Dashes used to mark connections223between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */224UCHAR_HYPHEN=14,225/** Binary property ID_Continue.226Characters that can continue an identifier.227DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."228ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */229UCHAR_ID_CONTINUE=15,230/** Binary property ID_Start.231Characters that can start an identifier.232Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */233UCHAR_ID_START=16,234/** Binary property Ideographic.235CJKV ideographs. @stable ICU 2.1 */236UCHAR_IDEOGRAPHIC=17,237/** Binary property IDS_Binary_Operator (new in Unicode 3.2).238For programmatic determination of239Ideographic Description Sequences. @stable ICU 2.1 */240UCHAR_IDS_BINARY_OPERATOR=18,241/** Binary property IDS_Trinary_Operator (new in Unicode 3.2).242For programmatic determination of243Ideographic Description Sequences. @stable ICU 2.1 */244UCHAR_IDS_TRINARY_OPERATOR=19,245/** Binary property Join_Control.246Format controls for cursive joining and ligation. @stable ICU 2.1 */247UCHAR_JOIN_CONTROL=20,248/** Binary property Logical_Order_Exception (new in Unicode 3.2).249Characters that do not use logical order and250require special handling in most processing. @stable ICU 2.1 */251UCHAR_LOGICAL_ORDER_EXCEPTION=21,252/** Binary property Lowercase. Same as u_isULowercase, different from u_islower.253Ll+Other_Lowercase @stable ICU 2.1 */254UCHAR_LOWERCASE=22,255/** Binary property Math. Sm+Other_Math @stable ICU 2.1 */256UCHAR_MATH=23,257/** Binary property Noncharacter_Code_Point.258Code points that are explicitly defined as illegal259for the encoding of characters. @stable ICU 2.1 */260UCHAR_NONCHARACTER_CODE_POINT=24,261/** Binary property Quotation_Mark. @stable ICU 2.1 */262UCHAR_QUOTATION_MARK=25,263/** Binary property Radical (new in Unicode 3.2).264For programmatic determination of265Ideographic Description Sequences. @stable ICU 2.1 */266UCHAR_RADICAL=26,267/** Binary property Soft_Dotted (new in Unicode 3.2).268Characters with a "soft dot", like i or j.269An accent placed on these characters causes270the dot to disappear. @stable ICU 2.1 */271UCHAR_SOFT_DOTTED=27,272/** Binary property Terminal_Punctuation.273Punctuation characters that generally mark274the end of textual units. @stable ICU 2.1 */275UCHAR_TERMINAL_PUNCTUATION=28,276/** Binary property Unified_Ideograph (new in Unicode 3.2).277For programmatic determination of278Ideographic Description Sequences. @stable ICU 2.1 */279UCHAR_UNIFIED_IDEOGRAPH=29,280/** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.281Lu+Other_Uppercase @stable ICU 2.1 */282UCHAR_UPPERCASE=30,283/** Binary property White_Space.284Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.285Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */286UCHAR_WHITE_SPACE=31,287/** Binary property XID_Continue.288ID_Continue modified to allow closure under289normalization forms NFKC and NFKD. @stable ICU 2.1 */290UCHAR_XID_CONTINUE=32,291/** Binary property XID_Start. ID_Start modified to allow292closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */293UCHAR_XID_START=33,294/** Binary property Case_Sensitive. Either the source of a case295mapping or _in_ the target of a case mapping. Not the same as296the general category Cased_Letter. @stable ICU 2.6 */297UCHAR_CASE_SENSITIVE=34,298/** Binary property STerm (new in Unicode 4.0.1).299Sentence Terminal. Used in UAX #29: Text Boundaries300(http://www.unicode.org/reports/tr29/)301@stable ICU 3.0 */302UCHAR_S_TERM=35,303/** Binary property Variation_Selector (new in Unicode 4.0.1).304Indicates all those characters that qualify as Variation Selectors.305For details on the behavior of these characters,306see StandardizedVariants.html and 15.6 Variation Selectors.307@stable ICU 3.0 */308UCHAR_VARIATION_SELECTOR=36,309/** Binary property NFD_Inert.310ICU-specific property for characters that are inert under NFD,311i.e., they do not interact with adjacent characters.312See the documentation for the Normalizer2 class and the313Normalizer2::isInert() method.314@stable ICU 3.0 */315UCHAR_NFD_INERT=37,316/** Binary property NFKD_Inert.317ICU-specific property for characters that are inert under NFKD,318i.e., they do not interact with adjacent characters.319See the documentation for the Normalizer2 class and the320Normalizer2::isInert() method.321@stable ICU 3.0 */322UCHAR_NFKD_INERT=38,323/** Binary property NFC_Inert.324ICU-specific property for characters that are inert under NFC,325i.e., they do not interact with adjacent characters.326See the documentation for the Normalizer2 class and the327Normalizer2::isInert() method.328@stable ICU 3.0 */329UCHAR_NFC_INERT=39,330/** Binary property NFKC_Inert.331ICU-specific property for characters that are inert under NFKC,332i.e., they do not interact with adjacent characters.333See the documentation for the Normalizer2 class and the334Normalizer2::isInert() method.335@stable ICU 3.0 */336UCHAR_NFKC_INERT=40,337/** Binary Property Segment_Starter.338ICU-specific property for characters that are starters in terms of339Unicode normalization and combining character sequences.340They have ccc=0 and do not occur in non-initial position of the341canonical decomposition of any character342(like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)).343ICU uses this property for segmenting a string for generating a set of344canonically equivalent strings, e.g. for canonical closure while345processing collation tailoring rules.346@stable ICU 3.0 */347UCHAR_SEGMENT_STARTER=41,348/** Binary property Pattern_Syntax (new in Unicode 4.1).349See UAX #31 Identifier and Pattern Syntax350(http://www.unicode.org/reports/tr31/)351@stable ICU 3.4 */352UCHAR_PATTERN_SYNTAX=42,353/** Binary property Pattern_White_Space (new in Unicode 4.1).354See UAX #31 Identifier and Pattern Syntax355(http://www.unicode.org/reports/tr31/)356@stable ICU 3.4 */357UCHAR_PATTERN_WHITE_SPACE=43,358/** Binary property alnum (a C/POSIX character class).359Implemented according to the UTS #18 Annex C Standard Recommendation.360See the uchar.h file documentation.361@stable ICU 3.4 */362UCHAR_POSIX_ALNUM=44,363/** Binary property blank (a C/POSIX character class).364Implemented according to the UTS #18 Annex C Standard Recommendation.365See the uchar.h file documentation.366@stable ICU 3.4 */367UCHAR_POSIX_BLANK=45,368/** Binary property graph (a C/POSIX character class).369Implemented according to the UTS #18 Annex C Standard Recommendation.370See the uchar.h file documentation.371@stable ICU 3.4 */372UCHAR_POSIX_GRAPH=46,373/** Binary property print (a C/POSIX character class).374Implemented according to the UTS #18 Annex C Standard Recommendation.375See the uchar.h file documentation.376@stable ICU 3.4 */377UCHAR_POSIX_PRINT=47,378/** Binary property xdigit (a C/POSIX character class).379Implemented according to the UTS #18 Annex C Standard Recommendation.380See the uchar.h file documentation.381@stable ICU 3.4 */382UCHAR_POSIX_XDIGIT=48,383/** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */384UCHAR_CASED=49,385/** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */386UCHAR_CASE_IGNORABLE=50,387/** Binary property Changes_When_Lowercased. @stable ICU 4.4 */388UCHAR_CHANGES_WHEN_LOWERCASED=51,389/** Binary property Changes_When_Uppercased. @stable ICU 4.4 */390UCHAR_CHANGES_WHEN_UPPERCASED=52,391/** Binary property Changes_When_Titlecased. @stable ICU 4.4 */392UCHAR_CHANGES_WHEN_TITLECASED=53,393/** Binary property Changes_When_Casefolded. @stable ICU 4.4 */394UCHAR_CHANGES_WHEN_CASEFOLDED=54,395/** Binary property Changes_When_Casemapped. @stable ICU 4.4 */396UCHAR_CHANGES_WHEN_CASEMAPPED=55,397/** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */398UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,399#ifndef U_HIDE_DRAFT_API400/**401* Binary property Emoji.402* See http://www.unicode.org/reports/tr51/#Emoji_Properties403*404* @draft ICU 57405*/406UCHAR_EMOJI=57,407/**408* Binary property Emoji_Presentation.409* See http://www.unicode.org/reports/tr51/#Emoji_Properties410*411* @draft ICU 57412*/413UCHAR_EMOJI_PRESENTATION=58,414/**415* Binary property Emoji_Modifier.416* See http://www.unicode.org/reports/tr51/#Emoji_Properties417*418* @draft ICU 57419*/420UCHAR_EMOJI_MODIFIER=59,421/**422* Binary property Emoji_Modifier_Base.423* See http://www.unicode.org/reports/tr51/#Emoji_Properties424*425* @draft ICU 57426*/427UCHAR_EMOJI_MODIFIER_BASE=60,428#endif /* U_HIDE_DRAFT_API */429/** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */430UCHAR_BINARY_LIMIT=61,431432/** Enumerated property Bidi_Class.433Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */434UCHAR_BIDI_CLASS=0x1000,435/** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */436UCHAR_INT_START=UCHAR_BIDI_CLASS,437/** Enumerated property Block.438Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */439UCHAR_BLOCK=0x1001,440/** Enumerated property Canonical_Combining_Class.441Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */442UCHAR_CANONICAL_COMBINING_CLASS=0x1002,443/** Enumerated property Decomposition_Type.444Returns UDecompositionType values. @stable ICU 2.2 */445UCHAR_DECOMPOSITION_TYPE=0x1003,446/** Enumerated property East_Asian_Width.447See http://www.unicode.org/reports/tr11/448Returns UEastAsianWidth values. @stable ICU 2.2 */449UCHAR_EAST_ASIAN_WIDTH=0x1004,450/** Enumerated property General_Category.451Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */452UCHAR_GENERAL_CATEGORY=0x1005,453/** Enumerated property Joining_Group.454Returns UJoiningGroup values. @stable ICU 2.2 */455UCHAR_JOINING_GROUP=0x1006,456/** Enumerated property Joining_Type.457Returns UJoiningType values. @stable ICU 2.2 */458UCHAR_JOINING_TYPE=0x1007,459/** Enumerated property Line_Break.460Returns ULineBreak values. @stable ICU 2.2 */461UCHAR_LINE_BREAK=0x1008,462/** Enumerated property Numeric_Type.463Returns UNumericType values. @stable ICU 2.2 */464UCHAR_NUMERIC_TYPE=0x1009,465/** Enumerated property Script.466Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */467UCHAR_SCRIPT=0x100A,468/** Enumerated property Hangul_Syllable_Type, new in Unicode 4.469Returns UHangulSyllableType values. @stable ICU 2.6 */470UCHAR_HANGUL_SYLLABLE_TYPE=0x100B,471/** Enumerated property NFD_Quick_Check.472Returns UNormalizationCheckResult values. @stable ICU 3.0 */473UCHAR_NFD_QUICK_CHECK=0x100C,474/** Enumerated property NFKD_Quick_Check.475Returns UNormalizationCheckResult values. @stable ICU 3.0 */476UCHAR_NFKD_QUICK_CHECK=0x100D,477/** Enumerated property NFC_Quick_Check.478Returns UNormalizationCheckResult values. @stable ICU 3.0 */479UCHAR_NFC_QUICK_CHECK=0x100E,480/** Enumerated property NFKC_Quick_Check.481Returns UNormalizationCheckResult values. @stable ICU 3.0 */482UCHAR_NFKC_QUICK_CHECK=0x100F,483/** Enumerated property Lead_Canonical_Combining_Class.484ICU-specific property for the ccc of the first code point485of the decomposition, or lccc(c)=ccc(NFD(c)[0]).486Useful for checking for canonically ordered text;487see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .488Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */489UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010,490/** Enumerated property Trail_Canonical_Combining_Class.491ICU-specific property for the ccc of the last code point492of the decomposition, or tccc(c)=ccc(NFD(c)[last]).493Useful for checking for canonically ordered text;494see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .495Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */496UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011,497/** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).498Used in UAX #29: Text Boundaries499(http://www.unicode.org/reports/tr29/)500Returns UGraphemeClusterBreak values. @stable ICU 3.4 */501UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012,502/** Enumerated property Sentence_Break (new in Unicode 4.1).503Used in UAX #29: Text Boundaries504(http://www.unicode.org/reports/tr29/)505Returns USentenceBreak values. @stable ICU 3.4 */506UCHAR_SENTENCE_BREAK=0x1013,507/** Enumerated property Word_Break (new in Unicode 4.1).508Used in UAX #29: Text Boundaries509(http://www.unicode.org/reports/tr29/)510Returns UWordBreakValues values. @stable ICU 3.4 */511UCHAR_WORD_BREAK=0x1014,512/** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).513Used in UAX #9: Unicode Bidirectional Algorithm514(http://www.unicode.org/reports/tr9/)515Returns UBidiPairedBracketType values. @stable ICU 52 */516UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,517/** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */518UCHAR_INT_LIMIT=0x1016,519520/** Bitmask property General_Category_Mask.521This is the General_Category property returned as a bit mask.522When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),523returns bit masks for UCharCategory values where exactly one bit is set.524When used with u_getPropertyValueName() and u_getPropertyValueEnum(),525a multi-bit mask is used for sets of categories like "Letters".526Mask values should be cast to uint32_t.527@stable ICU 2.4 */528UCHAR_GENERAL_CATEGORY_MASK=0x2000,529/** First constant for bit-mask Unicode properties. @stable ICU 2.4 */530UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,531/** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */532UCHAR_MASK_LIMIT=0x2001,533534/** Double property Numeric_Value.535Corresponds to u_getNumericValue. @stable ICU 2.4 */536UCHAR_NUMERIC_VALUE=0x3000,537/** First constant for double Unicode properties. @stable ICU 2.4 */538UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,539/** One more than the last constant for double Unicode properties. @stable ICU 2.4 */540UCHAR_DOUBLE_LIMIT=0x3001,541542/** String property Age.543Corresponds to u_charAge. @stable ICU 2.4 */544UCHAR_AGE=0x4000,545/** First constant for string Unicode properties. @stable ICU 2.4 */546UCHAR_STRING_START=UCHAR_AGE,547/** String property Bidi_Mirroring_Glyph.548Corresponds to u_charMirror. @stable ICU 2.4 */549UCHAR_BIDI_MIRRORING_GLYPH=0x4001,550/** String property Case_Folding.551Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */552UCHAR_CASE_FOLDING=0x4002,553#ifndef U_HIDE_DEPRECATED_API554/** Deprecated string property ISO_Comment.555Corresponds to u_getISOComment. @deprecated ICU 49 */556UCHAR_ISO_COMMENT=0x4003,557#endif /* U_HIDE_DEPRECATED_API */558/** String property Lowercase_Mapping.559Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */560UCHAR_LOWERCASE_MAPPING=0x4004,561/** String property Name.562Corresponds to u_charName. @stable ICU 2.4 */563UCHAR_NAME=0x4005,564/** String property Simple_Case_Folding.565Corresponds to u_foldCase. @stable ICU 2.4 */566UCHAR_SIMPLE_CASE_FOLDING=0x4006,567/** String property Simple_Lowercase_Mapping.568Corresponds to u_tolower. @stable ICU 2.4 */569UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007,570/** String property Simple_Titlecase_Mapping.571Corresponds to u_totitle. @stable ICU 2.4 */572UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008,573/** String property Simple_Uppercase_Mapping.574Corresponds to u_toupper. @stable ICU 2.4 */575UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009,576/** String property Titlecase_Mapping.577Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */578UCHAR_TITLECASE_MAPPING=0x400A,579#ifndef U_HIDE_DEPRECATED_API580/** String property Unicode_1_Name.581This property is of little practical value.582Beginning with ICU 49, ICU APIs return an empty string for this property.583Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */584UCHAR_UNICODE_1_NAME=0x400B,585#endif /* U_HIDE_DEPRECATED_API */586/** String property Uppercase_Mapping.587Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */588UCHAR_UPPERCASE_MAPPING=0x400C,589/** String property Bidi_Paired_Bracket (new in Unicode 6.3).590Corresponds to u_getBidiPairedBracket. @stable ICU 52 */591UCHAR_BIDI_PAIRED_BRACKET=0x400D,592/** One more than the last constant for string Unicode properties. @stable ICU 2.4 */593UCHAR_STRING_LIMIT=0x400E,594595/** Miscellaneous property Script_Extensions (new in Unicode 6.0).596Some characters are commonly used in multiple scripts.597For more information, see UAX #24: http://www.unicode.org/reports/tr24/.598Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.599@stable ICU 4.6 */600UCHAR_SCRIPT_EXTENSIONS=0x7000,601/** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */602UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,603/** One more than the last constant for Unicode properties with unusual value types.604* @stable ICU 4.6 */605UCHAR_OTHER_PROPERTY_LIMIT=0x7001,606/** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */607UCHAR_INVALID_CODE = -1608} UProperty;609610/**611* Data for enumerated Unicode general category types.612* See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .613* @stable ICU 2.0614*/615typedef enum UCharCategory616{617/*618* Note: UCharCategory constants and their API comments are parsed by preparseucd.py.619* It matches pairs of lines like620* / ** <Unicode 2-letter General_Category value> comment... * /621* U_<[A-Z_]+> = <integer>,622*/623624/** Non-category for unassigned and non-character code points. @stable ICU 2.0 */625U_UNASSIGNED = 0,626/** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */627U_GENERAL_OTHER_TYPES = 0,628/** Lu @stable ICU 2.0 */629U_UPPERCASE_LETTER = 1,630/** Ll @stable ICU 2.0 */631U_LOWERCASE_LETTER = 2,632/** Lt @stable ICU 2.0 */633U_TITLECASE_LETTER = 3,634/** Lm @stable ICU 2.0 */635U_MODIFIER_LETTER = 4,636/** Lo @stable ICU 2.0 */637U_OTHER_LETTER = 5,638/** Mn @stable ICU 2.0 */639U_NON_SPACING_MARK = 6,640/** Me @stable ICU 2.0 */641U_ENCLOSING_MARK = 7,642/** Mc @stable ICU 2.0 */643U_COMBINING_SPACING_MARK = 8,644/** Nd @stable ICU 2.0 */645U_DECIMAL_DIGIT_NUMBER = 9,646/** Nl @stable ICU 2.0 */647U_LETTER_NUMBER = 10,648/** No @stable ICU 2.0 */649U_OTHER_NUMBER = 11,650/** Zs @stable ICU 2.0 */651U_SPACE_SEPARATOR = 12,652/** Zl @stable ICU 2.0 */653U_LINE_SEPARATOR = 13,654/** Zp @stable ICU 2.0 */655U_PARAGRAPH_SEPARATOR = 14,656/** Cc @stable ICU 2.0 */657U_CONTROL_CHAR = 15,658/** Cf @stable ICU 2.0 */659U_FORMAT_CHAR = 16,660/** Co @stable ICU 2.0 */661U_PRIVATE_USE_CHAR = 17,662/** Cs @stable ICU 2.0 */663U_SURROGATE = 18,664/** Pd @stable ICU 2.0 */665U_DASH_PUNCTUATION = 19,666/** Ps @stable ICU 2.0 */667U_START_PUNCTUATION = 20,668/** Pe @stable ICU 2.0 */669U_END_PUNCTUATION = 21,670/** Pc @stable ICU 2.0 */671U_CONNECTOR_PUNCTUATION = 22,672/** Po @stable ICU 2.0 */673U_OTHER_PUNCTUATION = 23,674/** Sm @stable ICU 2.0 */675U_MATH_SYMBOL = 24,676/** Sc @stable ICU 2.0 */677U_CURRENCY_SYMBOL = 25,678/** Sk @stable ICU 2.0 */679U_MODIFIER_SYMBOL = 26,680/** So @stable ICU 2.0 */681U_OTHER_SYMBOL = 27,682/** Pi @stable ICU 2.0 */683U_INITIAL_PUNCTUATION = 28,684/** Pf @stable ICU 2.0 */685U_FINAL_PUNCTUATION = 29,686/** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */687U_CHAR_CATEGORY_COUNT688} UCharCategory;689690/**691* U_GC_XX_MASK constants are bit flags corresponding to Unicode692* general category values.693* For each category, the nth bit is set if the numeric value of the694* corresponding UCharCategory constant is n.695*696* There are also some U_GC_Y_MASK constants for groups of general categories697* like L for all letter categories.698*699* @see u_charType700* @see U_GET_GC_MASK701* @see UCharCategory702* @stable ICU 2.1703*/704#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)705706/** Mask constant for a UCharCategory. @stable ICU 2.1 */707#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)708/** Mask constant for a UCharCategory. @stable ICU 2.1 */709#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)710/** Mask constant for a UCharCategory. @stable ICU 2.1 */711#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)712/** Mask constant for a UCharCategory. @stable ICU 2.1 */713#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)714/** Mask constant for a UCharCategory. @stable ICU 2.1 */715#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)716717/** Mask constant for a UCharCategory. @stable ICU 2.1 */718#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)719/** Mask constant for a UCharCategory. @stable ICU 2.1 */720#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)721/** Mask constant for a UCharCategory. @stable ICU 2.1 */722#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)723724/** Mask constant for a UCharCategory. @stable ICU 2.1 */725#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)726/** Mask constant for a UCharCategory. @stable ICU 2.1 */727#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)728/** Mask constant for a UCharCategory. @stable ICU 2.1 */729#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)730731/** Mask constant for a UCharCategory. @stable ICU 2.1 */732#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)733/** Mask constant for a UCharCategory. @stable ICU 2.1 */734#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)735/** Mask constant for a UCharCategory. @stable ICU 2.1 */736#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)737738/** Mask constant for a UCharCategory. @stable ICU 2.1 */739#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)740/** Mask constant for a UCharCategory. @stable ICU 2.1 */741#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)742/** Mask constant for a UCharCategory. @stable ICU 2.1 */743#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)744/** Mask constant for a UCharCategory. @stable ICU 2.1 */745#define U_GC_CS_MASK U_MASK(U_SURROGATE)746747/** Mask constant for a UCharCategory. @stable ICU 2.1 */748#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)749/** Mask constant for a UCharCategory. @stable ICU 2.1 */750#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)751/** Mask constant for a UCharCategory. @stable ICU 2.1 */752#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)753/** Mask constant for a UCharCategory. @stable ICU 2.1 */754#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)755/** Mask constant for a UCharCategory. @stable ICU 2.1 */756#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)757758/** Mask constant for a UCharCategory. @stable ICU 2.1 */759#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)760/** Mask constant for a UCharCategory. @stable ICU 2.1 */761#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)762/** Mask constant for a UCharCategory. @stable ICU 2.1 */763#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)764/** Mask constant for a UCharCategory. @stable ICU 2.1 */765#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)766767/** Mask constant for a UCharCategory. @stable ICU 2.1 */768#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)769/** Mask constant for a UCharCategory. @stable ICU 2.1 */770#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)771772773/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */774#define U_GC_L_MASK \775(U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)776777/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */778#define U_GC_LC_MASK \779(U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)780781/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */782#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)783784/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */785#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)786787/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */788#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)789790/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */791#define U_GC_C_MASK \792(U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)793794/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */795#define U_GC_P_MASK \796(U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \797U_GC_PI_MASK|U_GC_PF_MASK)798799/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */800#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)801802/**803* This specifies the language directional property of a character set.804* @stable ICU 2.0805*/806typedef enum UCharDirection {807/*808* Note: UCharDirection constants and their API comments are parsed by preparseucd.py.809* It matches pairs of lines like810* / ** <Unicode 1..3-letter Bidi_Class value> comment... * /811* U_<[A-Z_]+> = <integer>,812*/813814/** L @stable ICU 2.0 */815U_LEFT_TO_RIGHT = 0,816/** R @stable ICU 2.0 */817U_RIGHT_TO_LEFT = 1,818/** EN @stable ICU 2.0 */819U_EUROPEAN_NUMBER = 2,820/** ES @stable ICU 2.0 */821U_EUROPEAN_NUMBER_SEPARATOR = 3,822/** ET @stable ICU 2.0 */823U_EUROPEAN_NUMBER_TERMINATOR = 4,824/** AN @stable ICU 2.0 */825U_ARABIC_NUMBER = 5,826/** CS @stable ICU 2.0 */827U_COMMON_NUMBER_SEPARATOR = 6,828/** B @stable ICU 2.0 */829U_BLOCK_SEPARATOR = 7,830/** S @stable ICU 2.0 */831U_SEGMENT_SEPARATOR = 8,832/** WS @stable ICU 2.0 */833U_WHITE_SPACE_NEUTRAL = 9,834/** ON @stable ICU 2.0 */835U_OTHER_NEUTRAL = 10,836/** LRE @stable ICU 2.0 */837U_LEFT_TO_RIGHT_EMBEDDING = 11,838/** LRO @stable ICU 2.0 */839U_LEFT_TO_RIGHT_OVERRIDE = 12,840/** AL @stable ICU 2.0 */841U_RIGHT_TO_LEFT_ARABIC = 13,842/** RLE @stable ICU 2.0 */843U_RIGHT_TO_LEFT_EMBEDDING = 14,844/** RLO @stable ICU 2.0 */845U_RIGHT_TO_LEFT_OVERRIDE = 15,846/** PDF @stable ICU 2.0 */847U_POP_DIRECTIONAL_FORMAT = 16,848/** NSM @stable ICU 2.0 */849U_DIR_NON_SPACING_MARK = 17,850/** BN @stable ICU 2.0 */851U_BOUNDARY_NEUTRAL = 18,852/** FSI @stable ICU 52 */853U_FIRST_STRONG_ISOLATE = 19,854/** LRI @stable ICU 52 */855U_LEFT_TO_RIGHT_ISOLATE = 20,856/** RLI @stable ICU 52 */857U_RIGHT_TO_LEFT_ISOLATE = 21,858/** PDI @stable ICU 52 */859U_POP_DIRECTIONAL_ISOLATE = 22,860/** @stable ICU 2.0 */861U_CHAR_DIRECTION_COUNT862} UCharDirection;863864/**865* Bidi Paired Bracket Type constants.866*867* @see UCHAR_BIDI_PAIRED_BRACKET_TYPE868* @stable ICU 52869*/870typedef enum UBidiPairedBracketType {871/*872* Note: UBidiPairedBracketType constants are parsed by preparseucd.py.873* It matches lines like874* U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>875*/876877/** Not a paired bracket. @stable ICU 52 */878U_BPT_NONE,879/** Open paired bracket. @stable ICU 52 */880U_BPT_OPEN,881/** Close paired bracket. @stable ICU 52 */882U_BPT_CLOSE,883/** @stable ICU 52 */884U_BPT_COUNT /* 3 */885} UBidiPairedBracketType;886887/**888* Constants for Unicode blocks, see the Unicode Data file Blocks.txt889* @stable ICU 2.0890*/891enum UBlockCode {892/*893* Note: UBlockCode constants are parsed by preparseucd.py.894* It matches lines like895* UBLOCK_<Unicode Block value name> = <integer>,896*/897898/** New No_Block value in Unicode 4. @stable ICU 2.6 */899UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */900901/** @stable ICU 2.0 */902UBLOCK_BASIC_LATIN = 1, /*[0000]*/903904/** @stable ICU 2.0 */905UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/906907/** @stable ICU 2.0 */908UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/909910/** @stable ICU 2.0 */911UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/912913/** @stable ICU 2.0 */914UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/915916/** @stable ICU 2.0 */917UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/918919/** @stable ICU 2.0 */920UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/921922/**923* Unicode 3.2 renames this block to "Greek and Coptic".924* @stable ICU 2.0925*/926UBLOCK_GREEK =8, /*[0370]*/927928/** @stable ICU 2.0 */929UBLOCK_CYRILLIC =9, /*[0400]*/930931/** @stable ICU 2.0 */932UBLOCK_ARMENIAN =10, /*[0530]*/933934/** @stable ICU 2.0 */935UBLOCK_HEBREW =11, /*[0590]*/936937/** @stable ICU 2.0 */938UBLOCK_ARABIC =12, /*[0600]*/939940/** @stable ICU 2.0 */941UBLOCK_SYRIAC =13, /*[0700]*/942943/** @stable ICU 2.0 */944UBLOCK_THAANA =14, /*[0780]*/945946/** @stable ICU 2.0 */947UBLOCK_DEVANAGARI =15, /*[0900]*/948949/** @stable ICU 2.0 */950UBLOCK_BENGALI =16, /*[0980]*/951952/** @stable ICU 2.0 */953UBLOCK_GURMUKHI =17, /*[0A00]*/954955/** @stable ICU 2.0 */956UBLOCK_GUJARATI =18, /*[0A80]*/957958/** @stable ICU 2.0 */959UBLOCK_ORIYA =19, /*[0B00]*/960961/** @stable ICU 2.0 */962UBLOCK_TAMIL =20, /*[0B80]*/963964/** @stable ICU 2.0 */965UBLOCK_TELUGU =21, /*[0C00]*/966967/** @stable ICU 2.0 */968UBLOCK_KANNADA =22, /*[0C80]*/969970/** @stable ICU 2.0 */971UBLOCK_MALAYALAM =23, /*[0D00]*/972973/** @stable ICU 2.0 */974UBLOCK_SINHALA =24, /*[0D80]*/975976/** @stable ICU 2.0 */977UBLOCK_THAI =25, /*[0E00]*/978979/** @stable ICU 2.0 */980UBLOCK_LAO =26, /*[0E80]*/981982/** @stable ICU 2.0 */983UBLOCK_TIBETAN =27, /*[0F00]*/984985/** @stable ICU 2.0 */986UBLOCK_MYANMAR =28, /*[1000]*/987988/** @stable ICU 2.0 */989UBLOCK_GEORGIAN =29, /*[10A0]*/990991/** @stable ICU 2.0 */992UBLOCK_HANGUL_JAMO =30, /*[1100]*/993994/** @stable ICU 2.0 */995UBLOCK_ETHIOPIC =31, /*[1200]*/996997/** @stable ICU 2.0 */998UBLOCK_CHEROKEE =32, /*[13A0]*/9991000/** @stable ICU 2.0 */1001UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/10021003/** @stable ICU 2.0 */1004UBLOCK_OGHAM =34, /*[1680]*/10051006/** @stable ICU 2.0 */1007UBLOCK_RUNIC =35, /*[16A0]*/10081009/** @stable ICU 2.0 */1010UBLOCK_KHMER =36, /*[1780]*/10111012/** @stable ICU 2.0 */1013UBLOCK_MONGOLIAN =37, /*[1800]*/10141015/** @stable ICU 2.0 */1016UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/10171018/** @stable ICU 2.0 */1019UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/10201021/** @stable ICU 2.0 */1022UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/10231024/** @stable ICU 2.0 */1025UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/10261027/** @stable ICU 2.0 */1028UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/10291030/**1031* Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".1032* @stable ICU 2.01033*/1034UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/10351036/** @stable ICU 2.0 */1037UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/10381039/** @stable ICU 2.0 */1040UBLOCK_NUMBER_FORMS =45, /*[2150]*/10411042/** @stable ICU 2.0 */1043UBLOCK_ARROWS =46, /*[2190]*/10441045/** @stable ICU 2.0 */1046UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/10471048/** @stable ICU 2.0 */1049UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/10501051/** @stable ICU 2.0 */1052UBLOCK_CONTROL_PICTURES =49, /*[2400]*/10531054/** @stable ICU 2.0 */1055UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/10561057/** @stable ICU 2.0 */1058UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/10591060/** @stable ICU 2.0 */1061UBLOCK_BOX_DRAWING =52, /*[2500]*/10621063/** @stable ICU 2.0 */1064UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/10651066/** @stable ICU 2.0 */1067UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/10681069/** @stable ICU 2.0 */1070UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/10711072/** @stable ICU 2.0 */1073UBLOCK_DINGBATS =56, /*[2700]*/10741075/** @stable ICU 2.0 */1076UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/10771078/** @stable ICU 2.0 */1079UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/10801081/** @stable ICU 2.0 */1082UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/10831084/** @stable ICU 2.0 */1085UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/10861087/** @stable ICU 2.0 */1088UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/10891090/** @stable ICU 2.0 */1091UBLOCK_HIRAGANA =62, /*[3040]*/10921093/** @stable ICU 2.0 */1094UBLOCK_KATAKANA =63, /*[30A0]*/10951096/** @stable ICU 2.0 */1097UBLOCK_BOPOMOFO =64, /*[3100]*/10981099/** @stable ICU 2.0 */1100UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/11011102/** @stable ICU 2.0 */1103UBLOCK_KANBUN =66, /*[3190]*/11041105/** @stable ICU 2.0 */1106UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/11071108/** @stable ICU 2.0 */1109UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/11101111/** @stable ICU 2.0 */1112UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/11131114/** @stable ICU 2.0 */1115UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/11161117/** @stable ICU 2.0 */1118UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/11191120/** @stable ICU 2.0 */1121UBLOCK_YI_SYLLABLES =72, /*[A000]*/11221123/** @stable ICU 2.0 */1124UBLOCK_YI_RADICALS =73, /*[A490]*/11251126/** @stable ICU 2.0 */1127UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/11281129/** @stable ICU 2.0 */1130UBLOCK_HIGH_SURROGATES =75, /*[D800]*/11311132/** @stable ICU 2.0 */1133UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/11341135/** @stable ICU 2.0 */1136UBLOCK_LOW_SURROGATES =77, /*[DC00]*/11371138/**1139* Same as UBLOCK_PRIVATE_USE.1140* Until Unicode 3.1.1, the corresponding block name was "Private Use",1141* and multiple code point ranges had this block.1142* Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and1143* adds separate blocks for the supplementary PUAs.1144*1145* @stable ICU 2.01146*/1147UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/1148/**1149* Same as UBLOCK_PRIVATE_USE_AREA.1150* Until Unicode 3.1.1, the corresponding block name was "Private Use",1151* and multiple code point ranges had this block.1152* Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and1153* adds separate blocks for the supplementary PUAs.1154*1155* @stable ICU 2.01156*/1157UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA,11581159/** @stable ICU 2.0 */1160UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/11611162/** @stable ICU 2.0 */1163UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/11641165/** @stable ICU 2.0 */1166UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/11671168/** @stable ICU 2.0 */1169UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/11701171/** @stable ICU 2.0 */1172UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/11731174/** @stable ICU 2.0 */1175UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/11761177/** @stable ICU 2.0 */1178UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/11791180/** @stable ICU 2.0 */1181UBLOCK_SPECIALS =86, /*[FFF0]*/11821183/** @stable ICU 2.0 */1184UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/11851186/* New blocks in Unicode 3.1 */11871188/** @stable ICU 2.0 */1189UBLOCK_OLD_ITALIC = 88, /*[10300]*/1190/** @stable ICU 2.0 */1191UBLOCK_GOTHIC = 89, /*[10330]*/1192/** @stable ICU 2.0 */1193UBLOCK_DESERET = 90, /*[10400]*/1194/** @stable ICU 2.0 */1195UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/1196/** @stable ICU 2.0 */1197UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/1198/** @stable ICU 2.0 */1199UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/1200/** @stable ICU 2.0 */1201UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/1202/** @stable ICU 2.0 */1203UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/1204/** @stable ICU 2.0 */1205UBLOCK_TAGS = 96, /*[E0000]*/12061207/* New blocks in Unicode 3.2 */12081209/** @stable ICU 3.0 */1210UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/1211/**1212* Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".1213* @stable ICU 2.21214*/1215UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,1216/** @stable ICU 2.2 */1217UBLOCK_TAGALOG = 98, /*[1700]*/1218/** @stable ICU 2.2 */1219UBLOCK_HANUNOO = 99, /*[1720]*/1220/** @stable ICU 2.2 */1221UBLOCK_BUHID = 100, /*[1740]*/1222/** @stable ICU 2.2 */1223UBLOCK_TAGBANWA = 101, /*[1760]*/1224/** @stable ICU 2.2 */1225UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/1226/** @stable ICU 2.2 */1227UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/1228/** @stable ICU 2.2 */1229UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/1230/** @stable ICU 2.2 */1231UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/1232/** @stable ICU 2.2 */1233UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/1234/** @stable ICU 2.2 */1235UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/1236/** @stable ICU 2.2 */1237UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/1238/** @stable ICU 2.2 */1239UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/1240/** @stable ICU 2.2 */1241UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/12421243/* New blocks in Unicode 4 */12441245/** @stable ICU 2.6 */1246UBLOCK_LIMBU = 111, /*[1900]*/1247/** @stable ICU 2.6 */1248UBLOCK_TAI_LE = 112, /*[1950]*/1249/** @stable ICU 2.6 */1250UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/1251/** @stable ICU 2.6 */1252UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/1253/** @stable ICU 2.6 */1254UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/1255/** @stable ICU 2.6 */1256UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/1257/** @stable ICU 2.6 */1258UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/1259/** @stable ICU 2.6 */1260UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/1261/** @stable ICU 2.6 */1262UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/1263/** @stable ICU 2.6 */1264UBLOCK_UGARITIC = 120, /*[10380]*/1265/** @stable ICU 2.6 */1266UBLOCK_SHAVIAN = 121, /*[10450]*/1267/** @stable ICU 2.6 */1268UBLOCK_OSMANYA = 122, /*[10480]*/1269/** @stable ICU 2.6 */1270UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/1271/** @stable ICU 2.6 */1272UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/1273/** @stable ICU 2.6 */1274UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/12751276/* New blocks in Unicode 4.1 */12771278/** @stable ICU 3.4 */1279UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/1280/** @stable ICU 3.4 */1281UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/1282/** @stable ICU 3.4 */1283UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/1284/** @stable ICU 3.4 */1285UBLOCK_BUGINESE = 129, /*[1A00]*/1286/** @stable ICU 3.4 */1287UBLOCK_CJK_STROKES = 130, /*[31C0]*/1288/** @stable ICU 3.4 */1289UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/1290/** @stable ICU 3.4 */1291UBLOCK_COPTIC = 132, /*[2C80]*/1292/** @stable ICU 3.4 */1293UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/1294/** @stable ICU 3.4 */1295UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/1296/** @stable ICU 3.4 */1297UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/1298/** @stable ICU 3.4 */1299UBLOCK_GLAGOLITIC = 136, /*[2C00]*/1300/** @stable ICU 3.4 */1301UBLOCK_KHAROSHTHI = 137, /*[10A00]*/1302/** @stable ICU 3.4 */1303UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/1304/** @stable ICU 3.4 */1305UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/1306/** @stable ICU 3.4 */1307UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/1308/** @stable ICU 3.4 */1309UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/1310/** @stable ICU 3.4 */1311UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/1312/** @stable ICU 3.4 */1313UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/1314/** @stable ICU 3.4 */1315UBLOCK_TIFINAGH = 144, /*[2D30]*/1316/** @stable ICU 3.4 */1317UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/13181319/* New blocks in Unicode 5.0 */13201321/** @stable ICU 3.6 */1322UBLOCK_NKO = 146, /*[07C0]*/1323/** @stable ICU 3.6 */1324UBLOCK_BALINESE = 147, /*[1B00]*/1325/** @stable ICU 3.6 */1326UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/1327/** @stable ICU 3.6 */1328UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/1329/** @stable ICU 3.6 */1330UBLOCK_PHAGS_PA = 150, /*[A840]*/1331/** @stable ICU 3.6 */1332UBLOCK_PHOENICIAN = 151, /*[10900]*/1333/** @stable ICU 3.6 */1334UBLOCK_CUNEIFORM = 152, /*[12000]*/1335/** @stable ICU 3.6 */1336UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/1337/** @stable ICU 3.6 */1338UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/13391340/* New blocks in Unicode 5.1 */13411342/** @stable ICU 4.0 */1343UBLOCK_SUNDANESE = 155, /*[1B80]*/1344/** @stable ICU 4.0 */1345UBLOCK_LEPCHA = 156, /*[1C00]*/1346/** @stable ICU 4.0 */1347UBLOCK_OL_CHIKI = 157, /*[1C50]*/1348/** @stable ICU 4.0 */1349UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/1350/** @stable ICU 4.0 */1351UBLOCK_VAI = 159, /*[A500]*/1352/** @stable ICU 4.0 */1353UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/1354/** @stable ICU 4.0 */1355UBLOCK_SAURASHTRA = 161, /*[A880]*/1356/** @stable ICU 4.0 */1357UBLOCK_KAYAH_LI = 162, /*[A900]*/1358/** @stable ICU 4.0 */1359UBLOCK_REJANG = 163, /*[A930]*/1360/** @stable ICU 4.0 */1361UBLOCK_CHAM = 164, /*[AA00]*/1362/** @stable ICU 4.0 */1363UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/1364/** @stable ICU 4.0 */1365UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/1366/** @stable ICU 4.0 */1367UBLOCK_LYCIAN = 167, /*[10280]*/1368/** @stable ICU 4.0 */1369UBLOCK_CARIAN = 168, /*[102A0]*/1370/** @stable ICU 4.0 */1371UBLOCK_LYDIAN = 169, /*[10920]*/1372/** @stable ICU 4.0 */1373UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/1374/** @stable ICU 4.0 */1375UBLOCK_DOMINO_TILES = 171, /*[1F030]*/13761377/* New blocks in Unicode 5.2 */13781379/** @stable ICU 4.4 */1380UBLOCK_SAMARITAN = 172, /*[0800]*/1381/** @stable ICU 4.4 */1382UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/1383/** @stable ICU 4.4 */1384UBLOCK_TAI_THAM = 174, /*[1A20]*/1385/** @stable ICU 4.4 */1386UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/1387/** @stable ICU 4.4 */1388UBLOCK_LISU = 176, /*[A4D0]*/1389/** @stable ICU 4.4 */1390UBLOCK_BAMUM = 177, /*[A6A0]*/1391/** @stable ICU 4.4 */1392UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/1393/** @stable ICU 4.4 */1394UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/1395/** @stable ICU 4.4 */1396UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/1397/** @stable ICU 4.4 */1398UBLOCK_JAVANESE = 181, /*[A980]*/1399/** @stable ICU 4.4 */1400UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/1401/** @stable ICU 4.4 */1402UBLOCK_TAI_VIET = 183, /*[AA80]*/1403/** @stable ICU 4.4 */1404UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/1405/** @stable ICU 4.4 */1406UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/1407/** @stable ICU 4.4 */1408UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/1409/** @stable ICU 4.4 */1410UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/1411/** @stable ICU 4.4 */1412UBLOCK_AVESTAN = 188, /*[10B00]*/1413/** @stable ICU 4.4 */1414UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/1415/** @stable ICU 4.4 */1416UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/1417/** @stable ICU 4.4 */1418UBLOCK_OLD_TURKIC = 191, /*[10C00]*/1419/** @stable ICU 4.4 */1420UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/1421/** @stable ICU 4.4 */1422UBLOCK_KAITHI = 193, /*[11080]*/1423/** @stable ICU 4.4 */1424UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/1425/** @stable ICU 4.4 */1426UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/1427/** @stable ICU 4.4 */1428UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/1429/** @stable ICU 4.4 */1430UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/14311432/* New blocks in Unicode 6.0 */14331434/** @stable ICU 4.6 */1435UBLOCK_MANDAIC = 198, /*[0840]*/1436/** @stable ICU 4.6 */1437UBLOCK_BATAK = 199, /*[1BC0]*/1438/** @stable ICU 4.6 */1439UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/1440/** @stable ICU 4.6 */1441UBLOCK_BRAHMI = 201, /*[11000]*/1442/** @stable ICU 4.6 */1443UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/1444/** @stable ICU 4.6 */1445UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/1446/** @stable ICU 4.6 */1447UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/1448/** @stable ICU 4.6 */1449UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/1450/** @stable ICU 4.6 */1451UBLOCK_EMOTICONS = 206, /*[1F600]*/1452/** @stable ICU 4.6 */1453UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/1454/** @stable ICU 4.6 */1455UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/1456/** @stable ICU 4.6 */1457UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/14581459/* New blocks in Unicode 6.1 */14601461/** @stable ICU 49 */1462UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/1463/** @stable ICU 49 */1464UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/1465/** @stable ICU 49 */1466UBLOCK_CHAKMA = 212, /*[11100]*/1467/** @stable ICU 49 */1468UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/1469/** @stable ICU 49 */1470UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/1471/** @stable ICU 49 */1472UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/1473/** @stable ICU 49 */1474UBLOCK_MIAO = 216, /*[16F00]*/1475/** @stable ICU 49 */1476UBLOCK_SHARADA = 217, /*[11180]*/1477/** @stable ICU 49 */1478UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/1479/** @stable ICU 49 */1480UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/1481/** @stable ICU 49 */1482UBLOCK_TAKRI = 220, /*[11680]*/14831484/* New blocks in Unicode 7.0 */14851486/** @stable ICU 54 */1487UBLOCK_BASSA_VAH = 221, /*[16AD0]*/1488/** @stable ICU 54 */1489UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/1490/** @stable ICU 54 */1491UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/1492/** @stable ICU 54 */1493UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, /*[1AB0]*/1494/** @stable ICU 54 */1495UBLOCK_DUPLOYAN = 225, /*[1BC00]*/1496/** @stable ICU 54 */1497UBLOCK_ELBASAN = 226, /*[10500]*/1498/** @stable ICU 54 */1499UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, /*[1F780]*/1500/** @stable ICU 54 */1501UBLOCK_GRANTHA = 228, /*[11300]*/1502/** @stable ICU 54 */1503UBLOCK_KHOJKI = 229, /*[11200]*/1504/** @stable ICU 54 */1505UBLOCK_KHUDAWADI = 230, /*[112B0]*/1506/** @stable ICU 54 */1507UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/1508/** @stable ICU 54 */1509UBLOCK_LINEAR_A = 232, /*[10600]*/1510/** @stable ICU 54 */1511UBLOCK_MAHAJANI = 233, /*[11150]*/1512/** @stable ICU 54 */1513UBLOCK_MANICHAEAN = 234, /*[10AC0]*/1514/** @stable ICU 54 */1515UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/1516/** @stable ICU 54 */1517UBLOCK_MODI = 236, /*[11600]*/1518/** @stable ICU 54 */1519UBLOCK_MRO = 237, /*[16A40]*/1520/** @stable ICU 54 */1521UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/1522/** @stable ICU 54 */1523UBLOCK_NABATAEAN = 239, /*[10880]*/1524/** @stable ICU 54 */1525UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/1526/** @stable ICU 54 */1527UBLOCK_OLD_PERMIC = 241, /*[10350]*/1528/** @stable ICU 54 */1529UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/1530/** @stable ICU 54 */1531UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/1532/** @stable ICU 54 */1533UBLOCK_PALMYRENE = 244, /*[10860]*/1534/** @stable ICU 54 */1535UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/1536/** @stable ICU 54 */1537UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/1538/** @stable ICU 54 */1539UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, /*[1BCA0]*/1540/** @stable ICU 54 */1541UBLOCK_SIDDHAM = 248, /*[11580]*/1542/** @stable ICU 54 */1543UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, /*[111E0]*/1544/** @stable ICU 54 */1545UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/1546/** @stable ICU 54 */1547UBLOCK_TIRHUTA = 251, /*[11480]*/1548/** @stable ICU 54 */1549UBLOCK_WARANG_CITI = 252, /*[118A0]*/15501551/* New blocks in Unicode 8.0 */15521553/** @stable ICU 56 */1554UBLOCK_AHOM = 253, /*[11700]*/1555/** @stable ICU 56 */1556UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/1557/** @stable ICU 56 */1558UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/1559/** @stable ICU 56 */1560UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256, /*[2B820]*/1561/** @stable ICU 56 */1562UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257, /*[12480]*/1563/** @stable ICU 56 */1564UBLOCK_HATRAN = 258, /*[108E0]*/1565/** @stable ICU 56 */1566UBLOCK_MULTANI = 259, /*[11280]*/1567/** @stable ICU 56 */1568UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/1569/** @stable ICU 56 */1570UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261, /*[1F900]*/1571/** @stable ICU 56 */1572UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/15731574/** @stable ICU 2.0 */1575UBLOCK_COUNT = 263,15761577/** @stable ICU 2.0 */1578UBLOCK_INVALID_CODE=-11579};15801581/** @stable ICU 2.0 */1582typedef enum UBlockCode UBlockCode;15831584/**1585* East Asian Width constants.1586*1587* @see UCHAR_EAST_ASIAN_WIDTH1588* @see u_getIntPropertyValue1589* @stable ICU 2.21590*/1591typedef enum UEastAsianWidth {1592/*1593* Note: UEastAsianWidth constants are parsed by preparseucd.py.1594* It matches lines like1595* U_EA_<Unicode East_Asian_Width value name>1596*/15971598U_EA_NEUTRAL, /*[N]*/1599U_EA_AMBIGUOUS, /*[A]*/1600U_EA_HALFWIDTH, /*[H]*/1601U_EA_FULLWIDTH, /*[F]*/1602U_EA_NARROW, /*[Na]*/1603U_EA_WIDE, /*[W]*/1604U_EA_COUNT1605} UEastAsianWidth;16061607/**1608* Selector constants for u_charName().1609* u_charName() returns the "modern" name of a1610* Unicode character; or the name that was defined in1611* Unicode version 1.0, before the Unicode standard merged1612* with ISO-10646; or an "extended" name that gives each1613* Unicode code point a unique name.1614*1615* @see u_charName1616* @stable ICU 2.01617*/1618typedef enum UCharNameChoice {1619/** Unicode character name (Name property). @stable ICU 2.0 */1620U_UNICODE_CHAR_NAME,1621#ifndef U_HIDE_DEPRECATED_API1622/**1623* The Unicode_1_Name property value which is of little practical value.1624* Beginning with ICU 49, ICU APIs return an empty string for this name choice.1625* @deprecated ICU 491626*/1627U_UNICODE_10_CHAR_NAME,1628#endif /* U_HIDE_DEPRECATED_API */1629/** Standard or synthetic character name. @stable ICU 2.0 */1630U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2,1631/** Corrected name from NameAliases.txt. @stable ICU 4.4 */1632U_CHAR_NAME_ALIAS,1633/** @stable ICU 2.0 */1634U_CHAR_NAME_CHOICE_COUNT1635} UCharNameChoice;16361637/**1638* Selector constants for u_getPropertyName() and1639* u_getPropertyValueName(). These selectors are used to choose which1640* name is returned for a given property or value. All properties and1641* values have a long name. Most have a short name, but some do not.1642* Unicode allows for additional names, beyond the long and short1643* name, which would be indicated by U_LONG_PROPERTY_NAME + i, where1644* i=1, 2,...1645*1646* @see u_getPropertyName()1647* @see u_getPropertyValueName()1648* @stable ICU 2.41649*/1650typedef enum UPropertyNameChoice {1651U_SHORT_PROPERTY_NAME,1652U_LONG_PROPERTY_NAME,1653U_PROPERTY_NAME_CHOICE_COUNT1654} UPropertyNameChoice;16551656/**1657* Decomposition Type constants.1658*1659* @see UCHAR_DECOMPOSITION_TYPE1660* @stable ICU 2.21661*/1662typedef enum UDecompositionType {1663/*1664* Note: UDecompositionType constants are parsed by preparseucd.py.1665* It matches lines like1666* U_DT_<Unicode Decomposition_Type value name>1667*/16681669U_DT_NONE, /*[none]*/1670U_DT_CANONICAL, /*[can]*/1671U_DT_COMPAT, /*[com]*/1672U_DT_CIRCLE, /*[enc]*/1673U_DT_FINAL, /*[fin]*/1674U_DT_FONT, /*[font]*/1675U_DT_FRACTION, /*[fra]*/1676U_DT_INITIAL, /*[init]*/1677U_DT_ISOLATED, /*[iso]*/1678U_DT_MEDIAL, /*[med]*/1679U_DT_NARROW, /*[nar]*/1680U_DT_NOBREAK, /*[nb]*/1681U_DT_SMALL, /*[sml]*/1682U_DT_SQUARE, /*[sqr]*/1683U_DT_SUB, /*[sub]*/1684U_DT_SUPER, /*[sup]*/1685U_DT_VERTICAL, /*[vert]*/1686U_DT_WIDE, /*[wide]*/1687U_DT_COUNT /* 18 */1688} UDecompositionType;16891690/**1691* Joining Type constants.1692*1693* @see UCHAR_JOINING_TYPE1694* @stable ICU 2.21695*/1696typedef enum UJoiningType {1697/*1698* Note: UJoiningType constants are parsed by preparseucd.py.1699* It matches lines like1700* U_JT_<Unicode Joining_Type value name>1701*/17021703U_JT_NON_JOINING, /*[U]*/1704U_JT_JOIN_CAUSING, /*[C]*/1705U_JT_DUAL_JOINING, /*[D]*/1706U_JT_LEFT_JOINING, /*[L]*/1707U_JT_RIGHT_JOINING, /*[R]*/1708U_JT_TRANSPARENT, /*[T]*/1709U_JT_COUNT /* 6 */1710} UJoiningType;17111712/**1713* Joining Group constants.1714*1715* @see UCHAR_JOINING_GROUP1716* @stable ICU 2.21717*/1718typedef enum UJoiningGroup {1719/*1720* Note: UJoiningGroup constants are parsed by preparseucd.py.1721* It matches lines like1722* U_JG_<Unicode Joining_Group value name>1723*/17241725U_JG_NO_JOINING_GROUP,1726U_JG_AIN,1727U_JG_ALAPH,1728U_JG_ALEF,1729U_JG_BEH,1730U_JG_BETH,1731U_JG_DAL,1732U_JG_DALATH_RISH,1733U_JG_E,1734U_JG_FEH,1735U_JG_FINAL_SEMKATH,1736U_JG_GAF,1737U_JG_GAMAL,1738U_JG_HAH,1739U_JG_TEH_MARBUTA_GOAL, /**< @stable ICU 4.6 */1740U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL,1741U_JG_HE,1742U_JG_HEH,1743U_JG_HEH_GOAL,1744U_JG_HETH,1745U_JG_KAF,1746U_JG_KAPH,1747U_JG_KNOTTED_HEH,1748U_JG_LAM,1749U_JG_LAMADH,1750U_JG_MEEM,1751U_JG_MIM,1752U_JG_NOON,1753U_JG_NUN,1754U_JG_PE,1755U_JG_QAF,1756U_JG_QAPH,1757U_JG_REH,1758U_JG_REVERSED_PE,1759U_JG_SAD,1760U_JG_SADHE,1761U_JG_SEEN,1762U_JG_SEMKATH,1763U_JG_SHIN,1764U_JG_SWASH_KAF,1765U_JG_SYRIAC_WAW,1766U_JG_TAH,1767U_JG_TAW,1768U_JG_TEH_MARBUTA,1769U_JG_TETH,1770U_JG_WAW,1771U_JG_YEH,1772U_JG_YEH_BARREE,1773U_JG_YEH_WITH_TAIL,1774U_JG_YUDH,1775U_JG_YUDH_HE,1776U_JG_ZAIN,1777U_JG_FE, /**< @stable ICU 2.6 */1778U_JG_KHAPH, /**< @stable ICU 2.6 */1779U_JG_ZHAIN, /**< @stable ICU 2.6 */1780U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */1781U_JG_FARSI_YEH, /**< @stable ICU 4.4 */1782U_JG_NYA, /**< @stable ICU 4.4 */1783U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */1784U_JG_MANICHAEAN_ALEPH, /**< @stable ICU 54 */1785U_JG_MANICHAEAN_AYIN, /**< @stable ICU 54 */1786U_JG_MANICHAEAN_BETH, /**< @stable ICU 54 */1787U_JG_MANICHAEAN_DALETH, /**< @stable ICU 54 */1788U_JG_MANICHAEAN_DHAMEDH, /**< @stable ICU 54 */1789U_JG_MANICHAEAN_FIVE, /**< @stable ICU 54 */1790U_JG_MANICHAEAN_GIMEL, /**< @stable ICU 54 */1791U_JG_MANICHAEAN_HETH, /**< @stable ICU 54 */1792U_JG_MANICHAEAN_HUNDRED, /**< @stable ICU 54 */1793U_JG_MANICHAEAN_KAPH, /**< @stable ICU 54 */1794U_JG_MANICHAEAN_LAMEDH, /**< @stable ICU 54 */1795U_JG_MANICHAEAN_MEM, /**< @stable ICU 54 */1796U_JG_MANICHAEAN_NUN, /**< @stable ICU 54 */1797U_JG_MANICHAEAN_ONE, /**< @stable ICU 54 */1798U_JG_MANICHAEAN_PE, /**< @stable ICU 54 */1799U_JG_MANICHAEAN_QOPH, /**< @stable ICU 54 */1800U_JG_MANICHAEAN_RESH, /**< @stable ICU 54 */1801U_JG_MANICHAEAN_SADHE, /**< @stable ICU 54 */1802U_JG_MANICHAEAN_SAMEKH, /**< @stable ICU 54 */1803U_JG_MANICHAEAN_TAW, /**< @stable ICU 54 */1804U_JG_MANICHAEAN_TEN, /**< @stable ICU 54 */1805U_JG_MANICHAEAN_TETH, /**< @stable ICU 54 */1806U_JG_MANICHAEAN_THAMEDH, /**< @stable ICU 54 */1807U_JG_MANICHAEAN_TWENTY, /**< @stable ICU 54 */1808U_JG_MANICHAEAN_WAW, /**< @stable ICU 54 */1809U_JG_MANICHAEAN_YODH, /**< @stable ICU 54 */1810U_JG_MANICHAEAN_ZAYIN, /**< @stable ICU 54 */1811U_JG_STRAIGHT_WAW, /**< @stable ICU 54 */1812U_JG_COUNT1813} UJoiningGroup;18141815/**1816* Grapheme Cluster Break constants.1817*1818* @see UCHAR_GRAPHEME_CLUSTER_BREAK1819* @stable ICU 3.41820*/1821typedef enum UGraphemeClusterBreak {1822/*1823* Note: UGraphemeClusterBreak constants are parsed by preparseucd.py.1824* It matches lines like1825* U_GCB_<Unicode Grapheme_Cluster_Break value name>1826*/18271828U_GCB_OTHER = 0, /*[XX]*/1829U_GCB_CONTROL = 1, /*[CN]*/1830U_GCB_CR = 2, /*[CR]*/1831U_GCB_EXTEND = 3, /*[EX]*/1832U_GCB_L = 4, /*[L]*/1833U_GCB_LF = 5, /*[LF]*/1834U_GCB_LV = 6, /*[LV]*/1835U_GCB_LVT = 7, /*[LVT]*/1836U_GCB_T = 8, /*[T]*/1837U_GCB_V = 9, /*[V]*/1838U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */1839U_GCB_PREPEND = 11, /*[PP]*/1840U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */1841U_GCB_COUNT = 131842} UGraphemeClusterBreak;18431844/**1845* Word Break constants.1846* (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)1847*1848* @see UCHAR_WORD_BREAK1849* @stable ICU 3.41850*/1851typedef enum UWordBreakValues {1852/*1853* Note: UWordBreakValues constants are parsed by preparseucd.py.1854* It matches lines like1855* U_WB_<Unicode Word_Break value name>1856*/18571858U_WB_OTHER = 0, /*[XX]*/1859U_WB_ALETTER = 1, /*[LE]*/1860U_WB_FORMAT = 2, /*[FO]*/1861U_WB_KATAKANA = 3, /*[KA]*/1862U_WB_MIDLETTER = 4, /*[ML]*/1863U_WB_MIDNUM = 5, /*[MN]*/1864U_WB_NUMERIC = 6, /*[NU]*/1865U_WB_EXTENDNUMLET = 7, /*[EX]*/1866U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */1867U_WB_EXTEND = 9, /*[Extend]*/1868U_WB_LF = 10, /*[LF]*/1869U_WB_MIDNUMLET =11, /*[MB]*/1870U_WB_NEWLINE =12, /*[NL]*/1871U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */1872U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */1873U_WB_SINGLE_QUOTE = 15, /*[SQ]*/1874U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/1875U_WB_COUNT = 171876} UWordBreakValues;18771878/**1879* Sentence Break constants.1880*1881* @see UCHAR_SENTENCE_BREAK1882* @stable ICU 3.41883*/1884typedef enum USentenceBreak {1885/*1886* Note: USentenceBreak constants are parsed by preparseucd.py.1887* It matches lines like1888* U_SB_<Unicode Sentence_Break value name>1889*/18901891U_SB_OTHER = 0, /*[XX]*/1892U_SB_ATERM = 1, /*[AT]*/1893U_SB_CLOSE = 2, /*[CL]*/1894U_SB_FORMAT = 3, /*[FO]*/1895U_SB_LOWER = 4, /*[LO]*/1896U_SB_NUMERIC = 5, /*[NU]*/1897U_SB_OLETTER = 6, /*[LE]*/1898U_SB_SEP = 7, /*[SE]*/1899U_SB_SP = 8, /*[SP]*/1900U_SB_STERM = 9, /*[ST]*/1901U_SB_UPPER = 10, /*[UP]*/1902U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */1903U_SB_EXTEND = 12, /*[EX]*/1904U_SB_LF = 13, /*[LF]*/1905U_SB_SCONTINUE = 14, /*[SC]*/1906U_SB_COUNT = 151907} USentenceBreak;19081909/**1910* Line Break constants.1911*1912* @see UCHAR_LINE_BREAK1913* @stable ICU 2.21914*/1915typedef enum ULineBreak {1916/*1917* Note: ULineBreak constants are parsed by preparseucd.py.1918* It matches lines like1919* U_LB_<Unicode Line_Break value name>1920*/19211922U_LB_UNKNOWN = 0, /*[XX]*/1923U_LB_AMBIGUOUS = 1, /*[AI]*/1924U_LB_ALPHABETIC = 2, /*[AL]*/1925U_LB_BREAK_BOTH = 3, /*[B2]*/1926U_LB_BREAK_AFTER = 4, /*[BA]*/1927U_LB_BREAK_BEFORE = 5, /*[BB]*/1928U_LB_MANDATORY_BREAK = 6, /*[BK]*/1929U_LB_CONTINGENT_BREAK = 7, /*[CB]*/1930U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/1931U_LB_COMBINING_MARK = 9, /*[CM]*/1932U_LB_CARRIAGE_RETURN = 10, /*[CR]*/1933U_LB_EXCLAMATION = 11, /*[EX]*/1934U_LB_GLUE = 12, /*[GL]*/1935U_LB_HYPHEN = 13, /*[HY]*/1936U_LB_IDEOGRAPHIC = 14, /*[ID]*/1937/** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */1938U_LB_INSEPARABLE = 15, /*[IN]*/1939U_LB_INSEPERABLE = U_LB_INSEPARABLE,1940U_LB_INFIX_NUMERIC = 16, /*[IS]*/1941U_LB_LINE_FEED = 17, /*[LF]*/1942U_LB_NONSTARTER = 18, /*[NS]*/1943U_LB_NUMERIC = 19, /*[NU]*/1944U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/1945U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/1946U_LB_PREFIX_NUMERIC = 22, /*[PR]*/1947U_LB_QUOTATION = 23, /*[QU]*/1948U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/1949U_LB_SURROGATE = 25, /*[SG]*/1950U_LB_SPACE = 26, /*[SP]*/1951U_LB_BREAK_SYMBOLS = 27, /*[SY]*/1952U_LB_ZWSPACE = 28, /*[ZW]*/1953U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */1954U_LB_WORD_JOINER = 30, /*[WJ]*/1955U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */1956U_LB_H3 = 32, /*[H3]*/1957U_LB_JL = 33, /*[JL]*/1958U_LB_JT = 34, /*[JT]*/1959U_LB_JV = 35, /*[JV]*/1960U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */1961U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */1962U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */1963U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */1964U_LB_COUNT = 401965} ULineBreak;19661967/**1968* Numeric Type constants.1969*1970* @see UCHAR_NUMERIC_TYPE1971* @stable ICU 2.21972*/1973typedef enum UNumericType {1974/*1975* Note: UNumericType constants are parsed by preparseucd.py.1976* It matches lines like1977* U_NT_<Unicode Numeric_Type value name>1978*/19791980U_NT_NONE, /*[None]*/1981U_NT_DECIMAL, /*[de]*/1982U_NT_DIGIT, /*[di]*/1983U_NT_NUMERIC, /*[nu]*/1984U_NT_COUNT1985} UNumericType;19861987/**1988* Hangul Syllable Type constants.1989*1990* @see UCHAR_HANGUL_SYLLABLE_TYPE1991* @stable ICU 2.61992*/1993typedef enum UHangulSyllableType {1994/*1995* Note: UHangulSyllableType constants are parsed by preparseucd.py.1996* It matches lines like1997* U_HST_<Unicode Hangul_Syllable_Type value name>1998*/19992000U_HST_NOT_APPLICABLE, /*[NA]*/2001U_HST_LEADING_JAMO, /*[L]*/2002U_HST_VOWEL_JAMO, /*[V]*/2003U_HST_TRAILING_JAMO, /*[T]*/2004U_HST_LV_SYLLABLE, /*[LV]*/2005U_HST_LVT_SYLLABLE, /*[LVT]*/2006U_HST_COUNT2007} UHangulSyllableType;20082009/**2010* Check a binary Unicode property for a code point.2011*2012* Unicode, especially in version 3.2, defines many more properties than the2013* original set in UnicodeData.txt.2014*2015* The properties APIs are intended to reflect Unicode properties as defined2016* in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).2017* For details about the properties see http://www.unicode.org/ucd/ .2018* For names of Unicode properties see the UCD file PropertyAliases.txt.2019*2020* Important: If ICU is built with UCD files from Unicode versions below 3.2,2021* then properties marked with "new in Unicode 3.2" are not or not fully available.2022*2023* @param c Code point to test.2024* @param which UProperty selector constant, identifies which binary property to check.2025* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT.2026* @return TRUE or FALSE according to the binary Unicode property value for c.2027* Also FALSE if 'which' is out of bounds or if the Unicode version2028* does not have data for the property at all, or not for this code point.2029*2030* @see UProperty2031* @see u_getIntPropertyValue2032* @see u_getUnicodeVersion2033* @stable ICU 2.12034*/2035U_STABLE UBool U_EXPORT22036u_hasBinaryProperty(UChar32 c, UProperty which);20372038/**2039* Check if a code point has the Alphabetic Unicode property.2040* Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).2041* This is different from u_isalpha!2042* @param c Code point to test2043* @return true if the code point has the Alphabetic Unicode property, false otherwise2044*2045* @see UCHAR_ALPHABETIC2046* @see u_isalpha2047* @see u_hasBinaryProperty2048* @stable ICU 2.12049*/2050U_STABLE UBool U_EXPORT22051u_isUAlphabetic(UChar32 c);20522053/**2054* Check if a code point has the Lowercase Unicode property.2055* Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE).2056* This is different from u_islower!2057* @param c Code point to test2058* @return true if the code point has the Lowercase Unicode property, false otherwise2059*2060* @see UCHAR_LOWERCASE2061* @see u_islower2062* @see u_hasBinaryProperty2063* @stable ICU 2.12064*/2065U_STABLE UBool U_EXPORT22066u_isULowercase(UChar32 c);20672068/**2069* Check if a code point has the Uppercase Unicode property.2070* Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE).2071* This is different from u_isupper!2072* @param c Code point to test2073* @return true if the code point has the Uppercase Unicode property, false otherwise2074*2075* @see UCHAR_UPPERCASE2076* @see u_isupper2077* @see u_hasBinaryProperty2078* @stable ICU 2.12079*/2080U_STABLE UBool U_EXPORT22081u_isUUppercase(UChar32 c);20822083/**2084* Check if a code point has the White_Space Unicode property.2085* Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE).2086* This is different from both u_isspace and u_isWhitespace!2087*2088* Note: There are several ICU whitespace functions; please see the uchar.h2089* file documentation for a detailed comparison.2090*2091* @param c Code point to test2092* @return true if the code point has the White_Space Unicode property, false otherwise.2093*2094* @see UCHAR_WHITE_SPACE2095* @see u_isWhitespace2096* @see u_isspace2097* @see u_isJavaSpaceChar2098* @see u_hasBinaryProperty2099* @stable ICU 2.12100*/2101U_STABLE UBool U_EXPORT22102u_isUWhiteSpace(UChar32 c);21032104/**2105* Get the property value for an enumerated or integer Unicode property for a code point.2106* Also returns binary and mask property values.2107*2108* Unicode, especially in version 3.2, defines many more properties than the2109* original set in UnicodeData.txt.2110*2111* The properties APIs are intended to reflect Unicode properties as defined2112* in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).2113* For details about the properties see http://www.unicode.org/ .2114* For names of Unicode properties see the UCD file PropertyAliases.txt.2115*2116* Sample usage:2117* UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH);2118* UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);2119*2120* @param c Code point to test.2121* @param which UProperty selector constant, identifies which property to check.2122* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT2123* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT2124* or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.2125* @return Numeric value that is directly the property value or,2126* for enumerated properties, corresponds to the numeric value of the enumerated2127* constant of the respective property value enumeration type2128* (cast to enum type if necessary).2129* Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties.2130* Returns a bit-mask for mask properties.2131* Returns 0 if 'which' is out of bounds or if the Unicode version2132* does not have data for the property at all, or not for this code point.2133*2134* @see UProperty2135* @see u_hasBinaryProperty2136* @see u_getIntPropertyMinValue2137* @see u_getIntPropertyMaxValue2138* @see u_getUnicodeVersion2139* @stable ICU 2.22140*/2141U_STABLE int32_t U_EXPORT22142u_getIntPropertyValue(UChar32 c, UProperty which);21432144/**2145* Get the minimum value for an enumerated/integer/binary Unicode property.2146* Can be used together with u_getIntPropertyMaxValue2147* to allocate arrays of UnicodeSet or similar.2148*2149* @param which UProperty selector constant, identifies which binary property to check.2150* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT2151* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.2152* @return Minimum value returned by u_getIntPropertyValue for a Unicode property.2153* 0 if the property selector is out of range.2154*2155* @see UProperty2156* @see u_hasBinaryProperty2157* @see u_getUnicodeVersion2158* @see u_getIntPropertyMaxValue2159* @see u_getIntPropertyValue2160* @stable ICU 2.22161*/2162U_STABLE int32_t U_EXPORT22163u_getIntPropertyMinValue(UProperty which);21642165/**2166* Get the maximum value for an enumerated/integer/binary Unicode property.2167* Can be used together with u_getIntPropertyMinValue2168* to allocate arrays of UnicodeSet or similar.2169*2170* Examples for min/max values (for Unicode 3.2):2171*2172* - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)2173* - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)2174* - UCHAR_IDEOGRAPHIC: 0/1 (FALSE/TRUE)2175*2176* For undefined UProperty constant values, min/max values will be 0/-1.2177*2178* @param which UProperty selector constant, identifies which binary property to check.2179* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT2180* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.2181* @return Maximum value returned by u_getIntPropertyValue for a Unicode property.2182* <=0 if the property selector is out of range.2183*2184* @see UProperty2185* @see u_hasBinaryProperty2186* @see u_getUnicodeVersion2187* @see u_getIntPropertyMaxValue2188* @see u_getIntPropertyValue2189* @stable ICU 2.22190*/2191U_STABLE int32_t U_EXPORT22192u_getIntPropertyMaxValue(UProperty which);21932194/**2195* Get the numeric value for a Unicode code point as defined in the2196* Unicode Character Database.2197*2198* A "double" return type is necessary because2199* some numeric values are fractions, negative, or too large for int32_t.2200*2201* For characters without any numeric values in the Unicode Character Database,2202* this function will return U_NO_NUMERIC_VALUE.2203* Note: This is different from the Unicode Standard which specifies NaN as the default value.2204* (NaN is not available on all platforms.)2205*2206* Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()2207* also supports negative values, large values, and fractions,2208* while Java's getNumericValue() returns values 10..35 for ASCII letters.2209*2210* @param c Code point to get the numeric value for.2211* @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.2212*2213* @see U_NO_NUMERIC_VALUE2214* @stable ICU 2.22215*/2216U_STABLE double U_EXPORT22217u_getNumericValue(UChar32 c);22182219/**2220* Special value that is returned by u_getNumericValue when2221* no numeric value is defined for a code point.2222*2223* @see u_getNumericValue2224* @stable ICU 2.22225*/2226#define U_NO_NUMERIC_VALUE ((double)-123456789.)22272228/**2229* Determines whether the specified code point has the general category "Ll"2230* (lowercase letter).2231*2232* Same as java.lang.Character.isLowerCase().2233*2234* This misses some characters that are also lowercase but2235* have a different general category value.2236* In order to include those, use UCHAR_LOWERCASE.2237*2238* In addition to being equivalent to a Java function, this also serves2239* as a C/POSIX migration function.2240* See the comments about C/POSIX character classification functions in the2241* documentation at the top of this header file.2242*2243* @param c the code point to be tested2244* @return TRUE if the code point is an Ll lowercase letter2245*2246* @see UCHAR_LOWERCASE2247* @see u_isupper2248* @see u_istitle2249* @stable ICU 2.02250*/2251U_STABLE UBool U_EXPORT22252u_islower(UChar32 c);22532254/**2255* Determines whether the specified code point has the general category "Lu"2256* (uppercase letter).2257*2258* Same as java.lang.Character.isUpperCase().2259*2260* This misses some characters that are also uppercase but2261* have a different general category value.2262* In order to include those, use UCHAR_UPPERCASE.2263*2264* In addition to being equivalent to a Java function, this also serves2265* as a C/POSIX migration function.2266* See the comments about C/POSIX character classification functions in the2267* documentation at the top of this header file.2268*2269* @param c the code point to be tested2270* @return TRUE if the code point is an Lu uppercase letter2271*2272* @see UCHAR_UPPERCASE2273* @see u_islower2274* @see u_istitle2275* @see u_tolower2276* @stable ICU 2.02277*/2278U_STABLE UBool U_EXPORT22279u_isupper(UChar32 c);22802281/**2282* Determines whether the specified code point is a titlecase letter.2283* True for general category "Lt" (titlecase letter).2284*2285* Same as java.lang.Character.isTitleCase().2286*2287* @param c the code point to be tested2288* @return TRUE if the code point is an Lt titlecase letter2289*2290* @see u_isupper2291* @see u_islower2292* @see u_totitle2293* @stable ICU 2.02294*/2295U_STABLE UBool U_EXPORT22296u_istitle(UChar32 c);22972298/**2299* Determines whether the specified code point is a digit character according to Java.2300* True for characters with general category "Nd" (decimal digit numbers).2301* Beginning with Unicode 4, this is the same as2302* testing for the Numeric_Type of Decimal.2303*2304* Same as java.lang.Character.isDigit().2305*2306* In addition to being equivalent to a Java function, this also serves2307* as a C/POSIX migration function.2308* See the comments about C/POSIX character classification functions in the2309* documentation at the top of this header file.2310*2311* @param c the code point to be tested2312* @return TRUE if the code point is a digit character according to Character.isDigit()2313*2314* @stable ICU 2.02315*/2316U_STABLE UBool U_EXPORT22317u_isdigit(UChar32 c);23182319/**2320* Determines whether the specified code point is a letter character.2321* True for general categories "L" (letters).2322*2323* Same as java.lang.Character.isLetter().2324*2325* In addition to being equivalent to a Java function, this also serves2326* as a C/POSIX migration function.2327* See the comments about C/POSIX character classification functions in the2328* documentation at the top of this header file.2329*2330* @param c the code point to be tested2331* @return TRUE if the code point is a letter character2332*2333* @see u_isdigit2334* @see u_isalnum2335* @stable ICU 2.02336*/2337U_STABLE UBool U_EXPORT22338u_isalpha(UChar32 c);23392340/**2341* Determines whether the specified code point is an alphanumeric character2342* (letter or digit) according to Java.2343* True for characters with general categories2344* "L" (letters) and "Nd" (decimal digit numbers).2345*2346* Same as java.lang.Character.isLetterOrDigit().2347*2348* In addition to being equivalent to a Java function, this also serves2349* as a C/POSIX migration function.2350* See the comments about C/POSIX character classification functions in the2351* documentation at the top of this header file.2352*2353* @param c the code point to be tested2354* @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit()2355*2356* @stable ICU 2.02357*/2358U_STABLE UBool U_EXPORT22359u_isalnum(UChar32 c);23602361/**2362* Determines whether the specified code point is a hexadecimal digit.2363* This is equivalent to u_digit(c, 16)>=0.2364* True for characters with general category "Nd" (decimal digit numbers)2365* as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.2366* (That is, for letters with code points2367* 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)2368*2369* In order to narrow the definition of hexadecimal digits to only ASCII2370* characters, use (c<=0x7f && u_isxdigit(c)).2371*2372* This is a C/POSIX migration function.2373* See the comments about C/POSIX character classification functions in the2374* documentation at the top of this header file.2375*2376* @param c the code point to be tested2377* @return TRUE if the code point is a hexadecimal digit2378*2379* @stable ICU 2.62380*/2381U_STABLE UBool U_EXPORT22382u_isxdigit(UChar32 c);23832384/**2385* Determines whether the specified code point is a punctuation character.2386* True for characters with general categories "P" (punctuation).2387*2388* This is a C/POSIX migration function.2389* See the comments about C/POSIX character classification functions in the2390* documentation at the top of this header file.2391*2392* @param c the code point to be tested2393* @return TRUE if the code point is a punctuation character2394*2395* @stable ICU 2.62396*/2397U_STABLE UBool U_EXPORT22398u_ispunct(UChar32 c);23992400/**2401* Determines whether the specified code point is a "graphic" character2402* (printable, excluding spaces).2403* TRUE for all characters except those with general categories2404* "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates),2405* "Cn" (unassigned), and "Z" (separators).2406*2407* This is a C/POSIX migration function.2408* See the comments about C/POSIX character classification functions in the2409* documentation at the top of this header file.2410*2411* @param c the code point to be tested2412* @return TRUE if the code point is a "graphic" character2413*2414* @stable ICU 2.62415*/2416U_STABLE UBool U_EXPORT22417u_isgraph(UChar32 c);24182419/**2420* Determines whether the specified code point is a "blank" or "horizontal space",2421* a character that visibly separates words on a line.2422* The following are equivalent definitions:2423*2424* TRUE for Unicode White_Space characters except for "vertical space controls"2425* where "vertical space controls" are the following characters:2426* U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)2427*2428* same as2429*2430* TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators)2431* except Zero Width Space (ZWSP, U+200B).2432*2433* Note: There are several ICU whitespace functions; please see the uchar.h2434* file documentation for a detailed comparison.2435*2436* This is a C/POSIX migration function.2437* See the comments about C/POSIX character classification functions in the2438* documentation at the top of this header file.2439*2440* @param c the code point to be tested2441* @return TRUE if the code point is a "blank"2442*2443* @stable ICU 2.62444*/2445U_STABLE UBool U_EXPORT22446u_isblank(UChar32 c);24472448/**2449* Determines whether the specified code point is "defined",2450* which usually means that it is assigned a character.2451* True for general categories other than "Cn" (other, not assigned),2452* i.e., true for all code points mentioned in UnicodeData.txt.2453*2454* Note that non-character code points (e.g., U+FDD0) are not "defined"2455* (they are Cn), but surrogate code points are "defined" (Cs).2456*2457* Same as java.lang.Character.isDefined().2458*2459* @param c the code point to be tested2460* @return TRUE if the code point is assigned a character2461*2462* @see u_isdigit2463* @see u_isalpha2464* @see u_isalnum2465* @see u_isupper2466* @see u_islower2467* @see u_istitle2468* @stable ICU 2.02469*/2470U_STABLE UBool U_EXPORT22471u_isdefined(UChar32 c);24722473/**2474* Determines if the specified character is a space character or not.2475*2476* Note: There are several ICU whitespace functions; please see the uchar.h2477* file documentation for a detailed comparison.2478*2479* This is a C/POSIX migration function.2480* See the comments about C/POSIX character classification functions in the2481* documentation at the top of this header file.2482*2483* @param c the character to be tested2484* @return true if the character is a space character; false otherwise.2485*2486* @see u_isJavaSpaceChar2487* @see u_isWhitespace2488* @see u_isUWhiteSpace2489* @stable ICU 2.02490*/2491U_STABLE UBool U_EXPORT22492u_isspace(UChar32 c);24932494/**2495* Determine if the specified code point is a space character according to Java.2496* True for characters with general categories "Z" (separators),2497* which does not include control codes (e.g., TAB or Line Feed).2498*2499* Same as java.lang.Character.isSpaceChar().2500*2501* Note: There are several ICU whitespace functions; please see the uchar.h2502* file documentation for a detailed comparison.2503*2504* @param c the code point to be tested2505* @return TRUE if the code point is a space character according to Character.isSpaceChar()2506*2507* @see u_isspace2508* @see u_isWhitespace2509* @see u_isUWhiteSpace2510* @stable ICU 2.62511*/2512U_STABLE UBool U_EXPORT22513u_isJavaSpaceChar(UChar32 c);25142515/**2516* Determines if the specified code point is a whitespace character according to Java/ICU.2517* A character is considered to be a Java whitespace character if and only2518* if it satisfies one of the following criteria:2519*2520* - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not2521* also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).2522* - It is U+0009 HORIZONTAL TABULATION.2523* - It is U+000A LINE FEED.2524* - It is U+000B VERTICAL TABULATION.2525* - It is U+000C FORM FEED.2526* - It is U+000D CARRIAGE RETURN.2527* - It is U+001C FILE SEPARATOR.2528* - It is U+001D GROUP SEPARATOR.2529* - It is U+001E RECORD SEPARATOR.2530* - It is U+001F UNIT SEPARATOR.2531*2532* This API tries to sync with the semantics of Java's2533* java.lang.Character.isWhitespace(), but it may not return2534* the exact same results because of the Unicode version2535* difference.2536*2537* Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)2538* to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.2539* See http://www.unicode.org/versions/Unicode4.0.1/2540*2541* Note: There are several ICU whitespace functions; please see the uchar.h2542* file documentation for a detailed comparison.2543*2544* @param c the code point to be tested2545* @return TRUE if the code point is a whitespace character according to Java/ICU2546*2547* @see u_isspace2548* @see u_isJavaSpaceChar2549* @see u_isUWhiteSpace2550* @stable ICU 2.02551*/2552U_STABLE UBool U_EXPORT22553u_isWhitespace(UChar32 c);25542555/**2556* Determines whether the specified code point is a control character2557* (as defined by this function).2558* A control character is one of the following:2559* - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)2560* - U_CONTROL_CHAR (Cc)2561* - U_FORMAT_CHAR (Cf)2562* - U_LINE_SEPARATOR (Zl)2563* - U_PARAGRAPH_SEPARATOR (Zp)2564*2565* This is a C/POSIX migration function.2566* See the comments about C/POSIX character classification functions in the2567* documentation at the top of this header file.2568*2569* @param c the code point to be tested2570* @return TRUE if the code point is a control character2571*2572* @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT2573* @see u_isprint2574* @stable ICU 2.02575*/2576U_STABLE UBool U_EXPORT22577u_iscntrl(UChar32 c);25782579/**2580* Determines whether the specified code point is an ISO control code.2581* True for U+0000..U+001f and U+007f..U+009f (general category "Cc").2582*2583* Same as java.lang.Character.isISOControl().2584*2585* @param c the code point to be tested2586* @return TRUE if the code point is an ISO control code2587*2588* @see u_iscntrl2589* @stable ICU 2.62590*/2591U_STABLE UBool U_EXPORT22592u_isISOControl(UChar32 c);25932594/**2595* Determines whether the specified code point is a printable character.2596* True for general categories <em>other</em> than "C" (controls).2597*2598* This is a C/POSIX migration function.2599* See the comments about C/POSIX character classification functions in the2600* documentation at the top of this header file.2601*2602* @param c the code point to be tested2603* @return TRUE if the code point is a printable character2604*2605* @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT2606* @see u_iscntrl2607* @stable ICU 2.02608*/2609U_STABLE UBool U_EXPORT22610u_isprint(UChar32 c);26112612/**2613* Determines whether the specified code point is a base character.2614* True for general categories "L" (letters), "N" (numbers),2615* "Mc" (spacing combining marks), and "Me" (enclosing marks).2616*2617* Note that this is different from the Unicode definition in2618* chapter 3.5, conformance clause D13,2619* which defines base characters to be all characters (not Cn)2620* that do not graphically combine with preceding characters (M)2621* and that are neither control (Cc) or format (Cf) characters.2622*2623* @param c the code point to be tested2624* @return TRUE if the code point is a base character according to this function2625*2626* @see u_isalpha2627* @see u_isdigit2628* @stable ICU 2.02629*/2630U_STABLE UBool U_EXPORT22631u_isbase(UChar32 c);26322633/**2634* Returns the bidirectional category value for the code point,2635* which is used in the Unicode bidirectional algorithm2636* (UAX #9 http://www.unicode.org/reports/tr9/).2637* Note that some <em>unassigned</em> code points have bidi values2638* of R or AL because they are in blocks that are reserved2639* for Right-To-Left scripts.2640*2641* Same as java.lang.Character.getDirectionality()2642*2643* @param c the code point to be tested2644* @return the bidirectional category (UCharDirection) value2645*2646* @see UCharDirection2647* @stable ICU 2.02648*/2649U_STABLE UCharDirection U_EXPORT22650u_charDirection(UChar32 c);26512652/**2653* Determines whether the code point has the Bidi_Mirrored property.2654* This property is set for characters that are commonly used in2655* Right-To-Left contexts and need to be displayed with a "mirrored"2656* glyph.2657*2658* Same as java.lang.Character.isMirrored().2659* Same as UCHAR_BIDI_MIRRORED2660*2661* @param c the code point to be tested2662* @return TRUE if the character has the Bidi_Mirrored property2663*2664* @see UCHAR_BIDI_MIRRORED2665* @stable ICU 2.02666*/2667U_STABLE UBool U_EXPORT22668u_isMirrored(UChar32 c);26692670/**2671* Maps the specified character to a "mirror-image" character.2672* For characters with the Bidi_Mirrored property, implementations2673* sometimes need a "poor man's" mapping to another Unicode2674* character (code point) such that the default glyph may serve2675* as the mirror-image of the default glyph of the specified2676* character. This is useful for text conversion to and from2677* codepages with visual order, and for displays without glyph2678* selection capabilities.2679*2680* @param c the code point to be mapped2681* @return another Unicode code point that may serve as a mirror-image2682* substitute, or c itself if there is no such mapping or c2683* does not have the Bidi_Mirrored property2684*2685* @see UCHAR_BIDI_MIRRORED2686* @see u_isMirrored2687* @stable ICU 2.02688*/2689U_STABLE UChar32 U_EXPORT22690u_charMirror(UChar32 c);26912692/**2693* Maps the specified character to its paired bracket character.2694* For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror().2695* Otherwise c itself is returned.2696* See http://www.unicode.org/reports/tr9/2697*2698* @param c the code point to be mapped2699* @return the paired bracket code point,2700* or c itself if there is no such mapping2701* (Bidi_Paired_Bracket_Type=None)2702*2703* @see UCHAR_BIDI_PAIRED_BRACKET2704* @see UCHAR_BIDI_PAIRED_BRACKET_TYPE2705* @see u_charMirror2706* @stable ICU 522707*/2708U_STABLE UChar32 U_EXPORT22709u_getBidiPairedBracket(UChar32 c);27102711/**2712* Returns the general category value for the code point.2713*2714* Same as java.lang.Character.getType().2715*2716* @param c the code point to be tested2717* @return the general category (UCharCategory) value2718*2719* @see UCharCategory2720* @stable ICU 2.02721*/2722U_STABLE int8_t U_EXPORT22723u_charType(UChar32 c);27242725/**2726* Get a single-bit bit set for the general category of a character.2727* This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc.2728* Same as U_MASK(u_charType(c)).2729*2730* @param c the code point to be tested2731* @return a single-bit mask corresponding to the general category (UCharCategory) value2732*2733* @see u_charType2734* @see UCharCategory2735* @see U_GC_CN_MASK2736* @stable ICU 2.12737*/2738#define U_GET_GC_MASK(c) U_MASK(u_charType(c))27392740/**2741* Callback from u_enumCharTypes(), is called for each contiguous range2742* of code points c (where start<=c<limit)2743* with the same Unicode general category ("character type").2744*2745* The callback function can stop the enumeration by returning FALSE.2746*2747* @param context an opaque pointer, as passed into utrie_enum()2748* @param start the first code point in a contiguous range with value2749* @param limit one past the last code point in a contiguous range with value2750* @param type the general category for all code points in [start..limit[2751* @return FALSE to stop the enumeration2752*2753* @stable ICU 2.12754* @see UCharCategory2755* @see u_enumCharTypes2756*/2757typedef UBool U_CALLCONV2758UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);27592760/**2761* Enumerate efficiently all code points with their Unicode general categories.2762*2763* This is useful for building data structures (e.g., UnicodeSet's),2764* for enumerating all assigned code points (type!=U_UNASSIGNED), etc.2765*2766* For each contiguous range of code points with a given general category ("character type"),2767* the UCharEnumTypeRange function is called.2768* Adjacent ranges have different types.2769* The Unicode Standard guarantees that the numeric value of the type is 0..31.2770*2771* @param enumRange a pointer to a function that is called for each contiguous range2772* of code points with the same general category2773* @param context an opaque pointer that is passed on to the callback function2774*2775* @stable ICU 2.12776* @see UCharCategory2777* @see UCharEnumTypeRange2778*/2779U_STABLE void U_EXPORT22780u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);27812782#if !UCONFIG_NO_NORMALIZATION27832784/**2785* Returns the combining class of the code point as specified in UnicodeData.txt.2786*2787* @param c the code point of the character2788* @return the combining class of the character2789* @stable ICU 2.02790*/2791U_STABLE uint8_t U_EXPORT22792u_getCombiningClass(UChar32 c);27932794#endif27952796/**2797* Returns the decimal digit value of a decimal digit character.2798* Such characters have the general category "Nd" (decimal digit numbers)2799* and a Numeric_Type of Decimal.2800*2801* Unlike ICU releases before 2.6, no digit values are returned for any2802* Han characters because Han number characters are often used with a special2803* Chinese-style number format (with characters for powers of 10 in between)2804* instead of in decimal-positional notation.2805* Unicode 4 explicitly assigns Han number characters the Numeric_Type2806* Numeric instead of Decimal.2807* See Jitterbug 1483 for more details.2808*2809* Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue()2810* for complete numeric Unicode properties.2811*2812* @param c the code point for which to get the decimal digit value2813* @return the decimal digit value of c,2814* or -1 if c is not a decimal digit character2815*2816* @see u_getNumericValue2817* @stable ICU 2.02818*/2819U_STABLE int32_t U_EXPORT22820u_charDigitValue(UChar32 c);28212822/**2823* Returns the Unicode allocation block that contains the character.2824*2825* @param c the code point to be tested2826* @return the block value (UBlockCode) for c2827*2828* @see UBlockCode2829* @stable ICU 2.02830*/2831U_STABLE UBlockCode U_EXPORT22832ublock_getCode(UChar32 c);28332834/**2835* Retrieve the name of a Unicode character.2836* Depending on <code>nameChoice</code>, the character name written2837* into the buffer is the "modern" name or the name that was defined2838* in Unicode version 1.0.2839* The name contains only "invariant" characters2840* like A-Z, 0-9, space, and '-'.2841* Unicode 1.0 names are only retrieved if they are different from the modern2842* names and if the data file contains the data for them. gennames may or may2843* not be called with a command line option to include 1.0 names in unames.dat.2844*2845* @param code The character (code point) for which to get the name.2846* It must be <code>0<=code<=0x10ffff</code>.2847* @param nameChoice Selector for which name to get.2848* @param buffer Destination address for copying the name.2849* The name will always be zero-terminated.2850* If there is no name, then the buffer will be set to the empty string.2851* @param bufferLength <code>==sizeof(buffer)</code>2852* @param pErrorCode Pointer to a UErrorCode variable;2853* check for <code>U_SUCCESS()</code> after <code>u_charName()</code>2854* returns.2855* @return The length of the name, or 0 if there is no name for this character.2856* If the bufferLength is less than or equal to the length, then the buffer2857* contains the truncated name and the returned length indicates the full2858* length of the name.2859* The length does not include the zero-termination.2860*2861* @see UCharNameChoice2862* @see u_charFromName2863* @see u_enumCharNames2864* @stable ICU 2.02865*/2866U_STABLE int32_t U_EXPORT22867u_charName(UChar32 code, UCharNameChoice nameChoice,2868char *buffer, int32_t bufferLength,2869UErrorCode *pErrorCode);28702871#ifndef U_HIDE_DEPRECATED_API2872/**2873* Returns an empty string.2874* Used to return the ISO 10646 comment for a character.2875* The Unicode ISO_Comment property is deprecated and has no values.2876*2877* @param c The character (code point) for which to get the ISO comment.2878* It must be <code>0<=c<=0x10ffff</code>.2879* @param dest Destination address for copying the comment.2880* The comment will be zero-terminated if possible.2881* If there is no comment, then the buffer will be set to the empty string.2882* @param destCapacity <code>==sizeof(dest)</code>2883* @param pErrorCode Pointer to a UErrorCode variable;2884* check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>2885* returns.2886* @return 02887*2888* @deprecated ICU 492889*/2890U_DEPRECATED int32_t U_EXPORT22891u_getISOComment(UChar32 c,2892char *dest, int32_t destCapacity,2893UErrorCode *pErrorCode);2894#endif /* U_HIDE_DEPRECATED_API */28952896/**2897* Find a Unicode character by its name and return its code point value.2898* The name is matched exactly and completely.2899* If the name does not correspond to a code point, <i>pErrorCode</i>2900* is set to <code>U_INVALID_CHAR_FOUND</code>.2901* A Unicode 1.0 name is matched only if it differs from the modern name.2902* Unicode names are all uppercase. Extended names are lowercase followed2903* by an uppercase hexadecimal number, and within angle brackets.2904*2905* @param nameChoice Selector for which name to match.2906* @param name The name to match.2907* @param pErrorCode Pointer to a UErrorCode variable2908* @return The Unicode value of the code point with the given name,2909* or an undefined value if there is no such code point.2910*2911* @see UCharNameChoice2912* @see u_charName2913* @see u_enumCharNames2914* @stable ICU 1.72915*/2916U_STABLE UChar32 U_EXPORT22917u_charFromName(UCharNameChoice nameChoice,2918const char *name,2919UErrorCode *pErrorCode);29202921/**2922* Type of a callback function for u_enumCharNames() that gets called2923* for each Unicode character with the code point value and2924* the character name.2925* If such a function returns FALSE, then the enumeration is stopped.2926*2927* @param context The context pointer that was passed to u_enumCharNames().2928* @param code The Unicode code point for the character with this name.2929* @param nameChoice Selector for which kind of names is enumerated.2930* @param name The character's name, zero-terminated.2931* @param length The length of the name.2932* @return TRUE if the enumeration should continue, FALSE to stop it.2933*2934* @see UCharNameChoice2935* @see u_enumCharNames2936* @stable ICU 1.72937*/2938typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,2939UChar32 code,2940UCharNameChoice nameChoice,2941const char *name,2942int32_t length);29432944/**2945* Enumerate all assigned Unicode characters between the start and limit2946* code points (start inclusive, limit exclusive) and call a function2947* for each, passing the code point value and the character name.2948* For Unicode 1.0 names, only those are enumerated that differ from the2949* modern names.2950*2951* @param start The first code point in the enumeration range.2952* @param limit One more than the last code point in the enumeration range2953* (the first one after the range).2954* @param fn The function that is to be called for each character name.2955* @param context An arbitrary pointer that is passed to the function.2956* @param nameChoice Selector for which kind of names to enumerate.2957* @param pErrorCode Pointer to a UErrorCode variable2958*2959* @see UCharNameChoice2960* @see UEnumCharNamesFn2961* @see u_charName2962* @see u_charFromName2963* @stable ICU 1.72964*/2965U_STABLE void U_EXPORT22966u_enumCharNames(UChar32 start, UChar32 limit,2967UEnumCharNamesFn *fn,2968void *context,2969UCharNameChoice nameChoice,2970UErrorCode *pErrorCode);29712972/**2973* Return the Unicode name for a given property, as given in the2974* Unicode database file PropertyAliases.txt.2975*2976* In addition, this function maps the property2977* UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /2978* "General_Category_Mask". These names are not in2979* PropertyAliases.txt.2980*2981* @param property UProperty selector other than UCHAR_INVALID_CODE.2982* If out of range, NULL is returned.2983*2984* @param nameChoice selector for which name to get. If out of range,2985* NULL is returned. All properties have a long name. Most2986* have a short name, but some do not. Unicode allows for2987* additional names; if present these will be returned by2988* U_LONG_PROPERTY_NAME + i, where i=1, 2,...2989*2990* @return a pointer to the name, or NULL if either the2991* property or the nameChoice is out of range. If a given2992* nameChoice returns NULL, then all larger values of2993* nameChoice will return NULL, with one exception: if NULL is2994* returned for U_SHORT_PROPERTY_NAME, then2995* U_LONG_PROPERTY_NAME (and higher) may still return a2996* non-NULL value. The returned pointer is valid until2997* u_cleanup() is called.2998*2999* @see UProperty3000* @see UPropertyNameChoice3001* @stable ICU 2.43002*/3003U_STABLE const char* U_EXPORT23004u_getPropertyName(UProperty property,3005UPropertyNameChoice nameChoice);30063007/**3008* Return the UProperty enum for a given property name, as specified3009* in the Unicode database file PropertyAliases.txt. Short, long, and3010* any other variants are recognized.3011*3012* In addition, this function maps the synthetic names "gcm" /3013* "General_Category_Mask" to the property3014* UCHAR_GENERAL_CATEGORY_MASK. These names are not in3015* PropertyAliases.txt.3016*3017* @param alias the property name to be matched. The name is compared3018* using "loose matching" as described in PropertyAliases.txt.3019*3020* @return a UProperty enum, or UCHAR_INVALID_CODE if the given name3021* does not match any property.3022*3023* @see UProperty3024* @stable ICU 2.43025*/3026U_STABLE UProperty U_EXPORT23027u_getPropertyEnum(const char* alias);30283029/**3030* Return the Unicode name for a given property value, as given in the3031* Unicode database file PropertyValueAliases.txt.3032*3033* Note: Some of the names in PropertyValueAliases.txt can only be3034* retrieved using UCHAR_GENERAL_CATEGORY_MASK, not3035* UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /3036* "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"3037* / "Punctuation", "S" / "Symbol", and "Z" / "Separator".3038*3039* @param property UProperty selector constant.3040* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT3041* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT3042* or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.3043* If out of range, NULL is returned.3044*3045* @param value selector for a value for the given property. If out3046* of range, NULL is returned. In general, valid values range3047* from 0 up to some maximum. There are a few exceptions:3048* (1.) UCHAR_BLOCK values begin at the non-zero value3049* UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS3050* values are not contiguous and range from 0..240. (3.)3051* UCHAR_GENERAL_CATEGORY_MASK values are not values of3052* UCharCategory, but rather mask values produced by3053* U_GET_GC_MASK(). This allows grouped categories such as3054* [:L:] to be represented. Mask values range3055* non-contiguously from 1..U_GC_P_MASK.3056*3057* @param nameChoice selector for which name to get. If out of range,3058* NULL is returned. All values have a long name. Most have3059* a short name, but some do not. Unicode allows for3060* additional names; if present these will be returned by3061* U_LONG_PROPERTY_NAME + i, where i=1, 2,...30623063* @return a pointer to the name, or NULL if either the3064* property or the nameChoice is out of range. If a given3065* nameChoice returns NULL, then all larger values of3066* nameChoice will return NULL, with one exception: if NULL is3067* returned for U_SHORT_PROPERTY_NAME, then3068* U_LONG_PROPERTY_NAME (and higher) may still return a3069* non-NULL value. The returned pointer is valid until3070* u_cleanup() is called.3071*3072* @see UProperty3073* @see UPropertyNameChoice3074* @stable ICU 2.43075*/3076U_STABLE const char* U_EXPORT23077u_getPropertyValueName(UProperty property,3078int32_t value,3079UPropertyNameChoice nameChoice);30803081/**3082* Return the property value integer for a given value name, as3083* specified in the Unicode database file PropertyValueAliases.txt.3084* Short, long, and any other variants are recognized.3085*3086* Note: Some of the names in PropertyValueAliases.txt will only be3087* recognized with UCHAR_GENERAL_CATEGORY_MASK, not3088* UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /3089* "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"3090* / "Punctuation", "S" / "Symbol", and "Z" / "Separator".3091*3092* @param property UProperty selector constant.3093* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT3094* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT3095* or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.3096* If out of range, UCHAR_INVALID_CODE is returned.3097*3098* @param alias the value name to be matched. The name is compared3099* using "loose matching" as described in3100* PropertyValueAliases.txt.3101*3102* @return a value integer or UCHAR_INVALID_CODE if the given name3103* does not match any value of the given property, or if the3104* property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values3105* are not values of UCharCategory, but rather mask values3106* produced by U_GET_GC_MASK(). This allows grouped3107* categories such as [:L:] to be represented.3108*3109* @see UProperty3110* @stable ICU 2.43111*/3112U_STABLE int32_t U_EXPORT23113u_getPropertyValueEnum(UProperty property,3114const char* alias);31153116/**3117* Determines if the specified character is permissible as the3118* first character in an identifier according to Unicode3119* (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).3120* True for characters with general categories "L" (letters) and "Nl" (letter numbers).3121*3122* Same as java.lang.Character.isUnicodeIdentifierStart().3123* Same as UCHAR_ID_START3124*3125* @param c the code point to be tested3126* @return TRUE if the code point may start an identifier3127*3128* @see UCHAR_ID_START3129* @see u_isalpha3130* @see u_isIDPart3131* @stable ICU 2.03132*/3133U_STABLE UBool U_EXPORT23134u_isIDStart(UChar32 c);31353136/**3137* Determines if the specified character is permissible3138* in an identifier according to Java.3139* True for characters with general categories "L" (letters),3140* "Nl" (letter numbers), "Nd" (decimal digits),3141* "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and3142* u_isIDIgnorable(c).3143*3144* Same as java.lang.Character.isUnicodeIdentifierPart().3145* Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE)3146* except that Unicode recommends to ignore Cf which is less than3147* u_isIDIgnorable(c).3148*3149* @param c the code point to be tested3150* @return TRUE if the code point may occur in an identifier according to Java3151*3152* @see UCHAR_ID_CONTINUE3153* @see u_isIDStart3154* @see u_isIDIgnorable3155* @stable ICU 2.03156*/3157U_STABLE UBool U_EXPORT23158u_isIDPart(UChar32 c);31593160/**3161* Determines if the specified character should be regarded3162* as an ignorable character in an identifier,3163* according to Java.3164* True for characters with general category "Cf" (format controls) as well as3165* non-whitespace ISO controls3166* (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F).3167*3168* Same as java.lang.Character.isIdentifierIgnorable().3169*3170* Note that Unicode just recommends to ignore Cf (format controls).3171*3172* @param c the code point to be tested3173* @return TRUE if the code point is ignorable in identifiers according to Java3174*3175* @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT3176* @see u_isIDStart3177* @see u_isIDPart3178* @stable ICU 2.03179*/3180U_STABLE UBool U_EXPORT23181u_isIDIgnorable(UChar32 c);31823183/**3184* Determines if the specified character is permissible as the3185* first character in a Java identifier.3186* In addition to u_isIDStart(c), true for characters with3187* general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).3188*3189* Same as java.lang.Character.isJavaIdentifierStart().3190*3191* @param c the code point to be tested3192* @return TRUE if the code point may start a Java identifier3193*3194* @see u_isJavaIDPart3195* @see u_isalpha3196* @see u_isIDStart3197* @stable ICU 2.03198*/3199U_STABLE UBool U_EXPORT23200u_isJavaIDStart(UChar32 c);32013202/**3203* Determines if the specified character is permissible3204* in a Java identifier.3205* In addition to u_isIDPart(c), true for characters with3206* general category "Sc" (currency symbols).3207*3208* Same as java.lang.Character.isJavaIdentifierPart().3209*3210* @param c the code point to be tested3211* @return TRUE if the code point may occur in a Java identifier3212*3213* @see u_isIDIgnorable3214* @see u_isJavaIDStart3215* @see u_isalpha3216* @see u_isdigit3217* @see u_isIDPart3218* @stable ICU 2.03219*/3220U_STABLE UBool U_EXPORT23221u_isJavaIDPart(UChar32 c);32223223/**3224* The given character is mapped to its lowercase equivalent according to3225* UnicodeData.txt; if the character has no lowercase equivalent, the character3226* itself is returned.3227*3228* Same as java.lang.Character.toLowerCase().3229*3230* This function only returns the simple, single-code point case mapping.3231* Full case mappings should be used whenever possible because they produce3232* better results by working on whole strings.3233* They take into account the string context and the language and can map3234* to a result string with a different length as appropriate.3235* Full case mappings are applied by the string case mapping functions,3236* see ustring.h and the UnicodeString class.3237* See also the User Guide chapter on C/POSIX migration:3238* http://icu-project.org/userguide/posix.html#case_mappings3239*3240* @param c the code point to be mapped3241* @return the Simple_Lowercase_Mapping of the code point, if any;3242* otherwise the code point itself.3243* @stable ICU 2.03244*/3245U_STABLE UChar32 U_EXPORT23246u_tolower(UChar32 c);32473248/**3249* The given character is mapped to its uppercase equivalent according to UnicodeData.txt;3250* if the character has no uppercase equivalent, the character itself is3251* returned.3252*3253* Same as java.lang.Character.toUpperCase().3254*3255* This function only returns the simple, single-code point case mapping.3256* Full case mappings should be used whenever possible because they produce3257* better results by working on whole strings.3258* They take into account the string context and the language and can map3259* to a result string with a different length as appropriate.3260* Full case mappings are applied by the string case mapping functions,3261* see ustring.h and the UnicodeString class.3262* See also the User Guide chapter on C/POSIX migration:3263* http://icu-project.org/userguide/posix.html#case_mappings3264*3265* @param c the code point to be mapped3266* @return the Simple_Uppercase_Mapping of the code point, if any;3267* otherwise the code point itself.3268* @stable ICU 2.03269*/3270U_STABLE UChar32 U_EXPORT23271u_toupper(UChar32 c);32723273/**3274* The given character is mapped to its titlecase equivalent3275* according to UnicodeData.txt;3276* if none is defined, the character itself is returned.3277*3278* Same as java.lang.Character.toTitleCase().3279*3280* This function only returns the simple, single-code point case mapping.3281* Full case mappings should be used whenever possible because they produce3282* better results by working on whole strings.3283* They take into account the string context and the language and can map3284* to a result string with a different length as appropriate.3285* Full case mappings are applied by the string case mapping functions,3286* see ustring.h and the UnicodeString class.3287* See also the User Guide chapter on C/POSIX migration:3288* http://icu-project.org/userguide/posix.html#case_mappings3289*3290* @param c the code point to be mapped3291* @return the Simple_Titlecase_Mapping of the code point, if any;3292* otherwise the code point itself.3293* @stable ICU 2.03294*/3295U_STABLE UChar32 U_EXPORT23296u_totitle(UChar32 c);32973298/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */3299#define U_FOLD_CASE_DEFAULT 033003301/**3302* Option value for case folding:3303*3304* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I3305* and dotless i appropriately for Turkic languages (tr, az).3306*3307* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that3308* are to be included for default mappings and3309* excluded for the Turkic-specific mappings.3310*3311* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that3312* are to be excluded for default mappings and3313* included for the Turkic-specific mappings.3314*3315* @stable ICU 2.03316*/3317#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 133183319/**3320* The given character is mapped to its case folding equivalent according to3321* UnicodeData.txt and CaseFolding.txt;3322* if the character has no case folding equivalent, the character3323* itself is returned.3324*3325* This function only returns the simple, single-code point case mapping.3326* Full case mappings should be used whenever possible because they produce3327* better results by working on whole strings.3328* They take into account the string context and the language and can map3329* to a result string with a different length as appropriate.3330* Full case mappings are applied by the string case mapping functions,3331* see ustring.h and the UnicodeString class.3332* See also the User Guide chapter on C/POSIX migration:3333* http://icu-project.org/userguide/posix.html#case_mappings3334*3335* @param c the code point to be mapped3336* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I3337* @return the Simple_Case_Folding of the code point, if any;3338* otherwise the code point itself.3339* @stable ICU 2.03340*/3341U_STABLE UChar32 U_EXPORT23342u_foldCase(UChar32 c, uint32_t options);33433344/**3345* Returns the decimal digit value of the code point in the3346* specified radix.3347*3348* If the radix is not in the range <code>2<=radix<=36</code> or if the3349* value of <code>c</code> is not a valid digit in the specified3350* radix, <code>-1</code> is returned. A character is a valid digit3351* if at least one of the following is true:3352* <ul>3353* <li>The character has a decimal digit value.3354* Such characters have the general category "Nd" (decimal digit numbers)3355* and a Numeric_Type of Decimal.3356* In this case the value is the character's decimal digit value.</li>3357* <li>The character is one of the uppercase Latin letters3358* <code>'A'</code> through <code>'Z'</code>.3359* In this case the value is <code>c-'A'+10</code>.</li>3360* <li>The character is one of the lowercase Latin letters3361* <code>'a'</code> through <code>'z'</code>.3362* In this case the value is <code>ch-'a'+10</code>.</li>3363* <li>Latin letters from both the ASCII range (0061..007A, 0041..005A)3364* as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A)3365* are recognized.</li>3366* </ul>3367*3368* Same as java.lang.Character.digit().3369*3370* @param ch the code point to be tested.3371* @param radix the radix.3372* @return the numeric value represented by the character in the3373* specified radix,3374* or -1 if there is no value or if the value exceeds the radix.3375*3376* @see UCHAR_NUMERIC_TYPE3377* @see u_forDigit3378* @see u_charDigitValue3379* @see u_isdigit3380* @stable ICU 2.03381*/3382U_STABLE int32_t U_EXPORT23383u_digit(UChar32 ch, int8_t radix);33843385/**3386* Determines the character representation for a specific digit in3387* the specified radix. If the value of <code>radix</code> is not a3388* valid radix, or the value of <code>digit</code> is not a valid3389* digit in the specified radix, the null character3390* (<code>U+0000</code>) is returned.3391* <p>3392* The <code>radix</code> argument is valid if it is greater than or3393* equal to 2 and less than or equal to 36.3394* The <code>digit</code> argument is valid if3395* <code>0 <= digit < radix</code>.3396* <p>3397* If the digit is less than 10, then3398* <code>'0' + digit</code> is returned. Otherwise, the value3399* <code>'a' + digit - 10</code> is returned.3400*3401* Same as java.lang.Character.forDigit().3402*3403* @param digit the number to convert to a character.3404* @param radix the radix.3405* @return the <code>char</code> representation of the specified digit3406* in the specified radix.3407*3408* @see u_digit3409* @see u_charDigitValue3410* @see u_isdigit3411* @stable ICU 2.03412*/3413U_STABLE UChar32 U_EXPORT23414u_forDigit(int32_t digit, int8_t radix);34153416/**3417* Get the "age" of the code point.3418* The "age" is the Unicode version when the code point was first3419* designated (as a non-character or for Private Use)3420* or assigned a character.3421* This can be useful to avoid emitting code points to receiving3422* processes that do not accept newer characters.3423* The data is from the UCD file DerivedAge.txt.3424*3425* @param c The code point.3426* @param versionArray The Unicode version number array, to be filled in.3427*3428* @stable ICU 2.13429*/3430U_STABLE void U_EXPORT23431u_charAge(UChar32 c, UVersionInfo versionArray);34323433/**3434* Gets the Unicode version information.3435* The version array is filled in with the version information3436* for the Unicode standard that is currently used by ICU.3437* For example, Unicode version 3.1.1 is represented as an array with3438* the values { 3, 1, 1, 0 }.3439*3440* @param versionArray an output array that will be filled in with3441* the Unicode version number3442* @stable ICU 2.03443*/3444U_STABLE void U_EXPORT23445u_getUnicodeVersion(UVersionInfo versionArray);34463447#if !UCONFIG_NO_NORMALIZATION3448/**3449* Get the FC_NFKC_Closure property string for a character.3450* See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"3451* or for "FNC": http://www.unicode.org/reports/tr15/3452*3453* @param c The character (code point) for which to get the FC_NFKC_Closure string.3454* It must be <code>0<=c<=0x10ffff</code>.3455* @param dest Destination address for copying the string.3456* The string will be zero-terminated if possible.3457* If there is no FC_NFKC_Closure string,3458* then the buffer will be set to the empty string.3459* @param destCapacity <code>==sizeof(dest)</code>3460* @param pErrorCode Pointer to a UErrorCode variable.3461* @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.3462* If the destCapacity is less than or equal to the length, then the buffer3463* contains the truncated name and the returned length indicates the full3464* length of the name.3465* The length does not include the zero-termination.3466*3467* @stable ICU 2.23468*/3469U_STABLE int32_t U_EXPORT23470u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);34713472#endif347334743475U_CDECL_END34763477#endif /*_UCHAR*/3478/*eof*/347934803481