Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/ucasemap.h
38827 views
/*1*******************************************************************************2*3* Copyright (C) 2005-2012, International Business Machines4* Corporation and others. All Rights Reserved.5*6*******************************************************************************7* file name: ucasemap.h8* encoding: US-ASCII9* tab size: 8 (not used)10* indentation:411*12* created on: 2005may0613* created by: Markus W. Scherer14*15* Case mapping service object and functions using it.16*/1718#ifndef __UCASEMAP_H__19#define __UCASEMAP_H__2021#include "unicode/utypes.h"22#include "unicode/ustring.h"23#include "unicode/localpointer.h"2425/**26* \file27* \brief C API: Unicode case mapping functions using a UCaseMap service object.28*29* The service object takes care of memory allocations, data loading, and setup30* for the attributes, as usual.31*32* Currently, the functionality provided here does not overlap with uchar.h33* and ustring.h, except for ucasemap_toTitle().34*35* ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.36*/3738/**39* UCaseMap is an opaque service object for newer ICU case mapping functions.40* Older functions did not use a service object.41* @stable ICU 3.442*/43struct UCaseMap;44typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */4546/**47* Open a UCaseMap service object for a locale and a set of options.48* The locale ID and options are preprocessed so that functions using the49* service object need not process them in each call.50*51* @param locale ICU locale ID, used for language-dependent52* upper-/lower-/title-casing according to the Unicode standard.53* Usual semantics: ""=root, NULL=default locale, etc.54* @param options Options bit set, used for case folding and string comparisons.55* Same flags as for u_foldCase(), u_strFoldCase(),56* u_strCaseCompare(), etc.57* Use 0 or U_FOLD_CASE_DEFAULT for default behavior.58* @param pErrorCode Must be a valid pointer to an error code value,59* which must not indicate a failure before the function call.60* @return Pointer to a UCaseMap service object, if successful.61*62* @see U_FOLD_CASE_DEFAULT63* @see U_FOLD_CASE_EXCLUDE_SPECIAL_I64* @see U_TITLECASE_NO_LOWERCASE65* @see U_TITLECASE_NO_BREAK_ADJUSTMENT66* @stable ICU 3.467*/68U_STABLE UCaseMap * U_EXPORT269ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);7071/**72* Close a UCaseMap service object.73* @param csm Object to be closed.74* @stable ICU 3.475*/76U_STABLE void U_EXPORT277ucasemap_close(UCaseMap *csm);7879#if U_SHOW_CPLUSPLUS_API8081U_NAMESPACE_BEGIN8283/**84* \class LocalUCaseMapPointer85* "Smart pointer" class, closes a UCaseMap via ucasemap_close().86* For most methods see the LocalPointerBase base class.87*88* @see LocalPointerBase89* @see LocalPointer90* @stable ICU 4.491*/92U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);9394U_NAMESPACE_END9596#endif9798/**99* Get the locale ID that is used for language-dependent case mappings.100* @param csm UCaseMap service object.101* @return locale ID102* @stable ICU 3.4103*/104U_STABLE const char * U_EXPORT2105ucasemap_getLocale(const UCaseMap *csm);106107/**108* Get the options bit set that is used for case folding and string comparisons.109* @param csm UCaseMap service object.110* @return options bit set111* @stable ICU 3.4112*/113U_STABLE uint32_t U_EXPORT2114ucasemap_getOptions(const UCaseMap *csm);115116/**117* Set the locale ID that is used for language-dependent case mappings.118*119* @param csm UCaseMap service object.120* @param locale Locale ID, see ucasemap_open().121* @param pErrorCode Must be a valid pointer to an error code value,122* which must not indicate a failure before the function call.123*124* @see ucasemap_open125* @stable ICU 3.4126*/127U_STABLE void U_EXPORT2128ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);129130/**131* Set the options bit set that is used for case folding and string comparisons.132*133* @param csm UCaseMap service object.134* @param options Options bit set, see ucasemap_open().135* @param pErrorCode Must be a valid pointer to an error code value,136* which must not indicate a failure before the function call.137*138* @see ucasemap_open139* @stable ICU 3.4140*/141U_STABLE void U_EXPORT2142ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);143144/**145* Do not lowercase non-initial parts of words when titlecasing.146* Option bit for titlecasing APIs that take an options bit set.147*148* By default, titlecasing will titlecase the first cased character149* of a word and lowercase all other characters.150* With this option, the other characters will not be modified.151*152* @see ucasemap_setOptions153* @see ucasemap_toTitle154* @see ucasemap_utf8ToTitle155* @see UnicodeString::toTitle156* @stable ICU 3.8157*/158#define U_TITLECASE_NO_LOWERCASE 0x100159160/**161* Do not adjust the titlecasing indexes from BreakIterator::next() indexes;162* titlecase exactly the characters at breaks from the iterator.163* Option bit for titlecasing APIs that take an options bit set.164*165* By default, titlecasing will take each break iterator index,166* adjust it by looking for the next cased character, and titlecase that one.167* Other characters are lowercased.168*169* This follows Unicode 4 & 5 section 3.13 Default Case Operations:170*171* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex172* #29, "Text Boundaries." Between each pair of word boundaries, find the first173* cased character F. If F exists, map F to default_title(F); then map each174* subsequent character C to default_lower(C).175*176* @see ucasemap_setOptions177* @see ucasemap_toTitle178* @see ucasemap_utf8ToTitle179* @see UnicodeString::toTitle180* @see U_TITLECASE_NO_LOWERCASE181* @stable ICU 3.8182*/183#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200184185#if !UCONFIG_NO_BREAK_ITERATION186187/**188* Get the break iterator that is used for titlecasing.189* Do not modify the returned break iterator.190* @param csm UCaseMap service object.191* @return titlecasing break iterator192* @stable ICU 3.8193*/194U_STABLE const UBreakIterator * U_EXPORT2195ucasemap_getBreakIterator(const UCaseMap *csm);196197/**198* Set the break iterator that is used for titlecasing.199* The UCaseMap service object releases a previously set break iterator200* and "adopts" this new one, taking ownership of it.201* It will be released in a subsequent call to ucasemap_setBreakIterator()202* or ucasemap_close().203*204* Break iterator operations are not thread-safe. Therefore, titlecasing205* functions use non-const UCaseMap objects. It is not possible to titlecase206* strings concurrently using the same UCaseMap.207*208* @param csm UCaseMap service object.209* @param iterToAdopt Break iterator to be adopted for titlecasing.210* @param pErrorCode Must be a valid pointer to an error code value,211* which must not indicate a failure before the function call.212*213* @see ucasemap_toTitle214* @see ucasemap_utf8ToTitle215* @stable ICU 3.8216*/217U_STABLE void U_EXPORT2218ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);219220/**221* Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),222* except that it takes ucasemap_setOptions() into account and has performance223* advantages from being able to use a UCaseMap object for multiple case mapping224* operations, saving setup time.225*226* Casing is locale-dependent and context-sensitive.227* Titlecasing uses a break iterator to find the first characters of words228* that are to be titlecased. It titlecases those characters and lowercases229* all others. (This can be modified with ucasemap_setOptions().)230*231* Note: This function takes a non-const UCaseMap pointer because it will232* open a default break iterator if no break iterator was set yet,233* and effectively call ucasemap_setBreakIterator();234* also because the break iterator is stateful and will be modified during235* the iteration.236*237* The titlecase break iterator can be provided to customize for arbitrary238* styles, using rules and dictionaries beyond the standard iterators.239* The standard titlecase iterator for the root locale implements the240* algorithm of Unicode TR 21.241*242* This function uses only the setUText(), first(), next() and close() methods of the243* provided break iterator.244*245* The result may be longer or shorter than the original.246* The source string and the destination buffer must not overlap.247*248* @param csm UCaseMap service object. This pointer is non-const!249* See the note above for details.250* @param dest A buffer for the result string. The result will be NUL-terminated if251* the buffer is large enough.252* The contents is undefined in case of failure.253* @param destCapacity The size of the buffer (number of bytes). If it is 0, then254* dest may be NULL and the function will only return the length of the result255* without writing any of the result string.256* @param src The original string.257* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.258* @param pErrorCode Must be a valid pointer to an error code value,259* which must not indicate a failure before the function call.260* @return The length of the result string, if successful - or in case of a buffer overflow,261* in which case it will be greater than destCapacity.262*263* @see u_strToTitle264* @stable ICU 3.8265*/266U_STABLE int32_t U_EXPORT2267ucasemap_toTitle(UCaseMap *csm,268UChar *dest, int32_t destCapacity,269const UChar *src, int32_t srcLength,270UErrorCode *pErrorCode);271272#endif273274/**275* Lowercase the characters in a UTF-8 string.276* Casing is locale-dependent and context-sensitive.277* The result may be longer or shorter than the original.278* The source string and the destination buffer must not overlap.279*280* @param csm UCaseMap service object.281* @param dest A buffer for the result string. The result will be NUL-terminated if282* the buffer is large enough.283* The contents is undefined in case of failure.284* @param destCapacity The size of the buffer (number of bytes). If it is 0, then285* dest may be NULL and the function will only return the length of the result286* without writing any of the result string.287* @param src The original string.288* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.289* @param pErrorCode Must be a valid pointer to an error code value,290* which must not indicate a failure before the function call.291* @return The length of the result string, if successful - or in case of a buffer overflow,292* in which case it will be greater than destCapacity.293*294* @see u_strToLower295* @stable ICU 3.4296*/297U_STABLE int32_t U_EXPORT2298ucasemap_utf8ToLower(const UCaseMap *csm,299char *dest, int32_t destCapacity,300const char *src, int32_t srcLength,301UErrorCode *pErrorCode);302303/**304* Uppercase the characters in a UTF-8 string.305* Casing is locale-dependent and context-sensitive.306* The result may be longer or shorter than the original.307* The source string and the destination buffer must not overlap.308*309* @param csm UCaseMap service object.310* @param dest A buffer for the result string. The result will be NUL-terminated if311* the buffer is large enough.312* The contents is undefined in case of failure.313* @param destCapacity The size of the buffer (number of bytes). If it is 0, then314* dest may be NULL and the function will only return the length of the result315* without writing any of the result string.316* @param src The original string.317* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.318* @param pErrorCode Must be a valid pointer to an error code value,319* which must not indicate a failure before the function call.320* @return The length of the result string, if successful - or in case of a buffer overflow,321* in which case it will be greater than destCapacity.322*323* @see u_strToUpper324* @stable ICU 3.4325*/326U_STABLE int32_t U_EXPORT2327ucasemap_utf8ToUpper(const UCaseMap *csm,328char *dest, int32_t destCapacity,329const char *src, int32_t srcLength,330UErrorCode *pErrorCode);331332#if !UCONFIG_NO_BREAK_ITERATION333334/**335* Titlecase a UTF-8 string.336* Casing is locale-dependent and context-sensitive.337* Titlecasing uses a break iterator to find the first characters of words338* that are to be titlecased. It titlecases those characters and lowercases339* all others. (This can be modified with ucasemap_setOptions().)340*341* Note: This function takes a non-const UCaseMap pointer because it will342* open a default break iterator if no break iterator was set yet,343* and effectively call ucasemap_setBreakIterator();344* also because the break iterator is stateful and will be modified during345* the iteration.346*347* The titlecase break iterator can be provided to customize for arbitrary348* styles, using rules and dictionaries beyond the standard iterators.349* The standard titlecase iterator for the root locale implements the350* algorithm of Unicode TR 21.351*352* This function uses only the setUText(), first(), next() and close() methods of the353* provided break iterator.354*355* The result may be longer or shorter than the original.356* The source string and the destination buffer must not overlap.357*358* @param csm UCaseMap service object. This pointer is non-const!359* See the note above for details.360* @param dest A buffer for the result string. The result will be NUL-terminated if361* the buffer is large enough.362* The contents is undefined in case of failure.363* @param destCapacity The size of the buffer (number of bytes). If it is 0, then364* dest may be NULL and the function will only return the length of the result365* without writing any of the result string.366* @param src The original string.367* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.368* @param pErrorCode Must be a valid pointer to an error code value,369* which must not indicate a failure before the function call.370* @return The length of the result string, if successful - or in case of a buffer overflow,371* in which case it will be greater than destCapacity.372*373* @see u_strToTitle374* @see U_TITLECASE_NO_LOWERCASE375* @see U_TITLECASE_NO_BREAK_ADJUSTMENT376* @stable ICU 3.8377*/378U_STABLE int32_t U_EXPORT2379ucasemap_utf8ToTitle(UCaseMap *csm,380char *dest, int32_t destCapacity,381const char *src, int32_t srcLength,382UErrorCode *pErrorCode);383384#endif385386/**387* Case-folds the characters in a UTF-8 string.388*389* Case-folding is locale-independent and not context-sensitive,390* but there is an option for whether to include or exclude mappings for dotted I391* and dotless i that are marked with 'T' in CaseFolding.txt.392*393* The result may be longer or shorter than the original.394* The source string and the destination buffer must not overlap.395*396* @param csm UCaseMap service object.397* @param dest A buffer for the result string. The result will be NUL-terminated if398* the buffer is large enough.399* The contents is undefined in case of failure.400* @param destCapacity The size of the buffer (number of bytes). If it is 0, then401* dest may be NULL and the function will only return the length of the result402* without writing any of the result string.403* @param src The original string.404* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.405* @param pErrorCode Must be a valid pointer to an error code value,406* which must not indicate a failure before the function call.407* @return The length of the result string, if successful - or in case of a buffer overflow,408* in which case it will be greater than destCapacity.409*410* @see u_strFoldCase411* @see ucasemap_setOptions412* @see U_FOLD_CASE_DEFAULT413* @see U_FOLD_CASE_EXCLUDE_SPECIAL_I414* @stable ICU 3.8415*/416U_STABLE int32_t U_EXPORT2417ucasemap_utf8FoldCase(const UCaseMap *csm,418char *dest, int32_t destCapacity,419const char *src, int32_t srcLength,420UErrorCode *pErrorCode);421422#endif423424425