Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/casemap.h
38827 views
// © 2017 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html23// casemap.h4// created: 2017jan12 Markus W. Scherer56#ifndef __CASEMAP_H__7#define __CASEMAP_H__89#include "unicode/utypes.h"1011#if U_SHOW_CPLUSPLUS_API1213#include "unicode/stringpiece.h"14#include "unicode/uobject.h"1516/**17* \file18* \brief C++ API: Low-level C++ case mapping functions.19*/2021U_NAMESPACE_BEGIN2223class BreakIterator;24class ByteSink;25class Edits;2627/**28* Low-level C++ case mapping functions.29*30* @stable ICU 5931*/32class U_COMMON_API CaseMap U_FINAL : public UMemory {33public:34/**35* Lowercases a UTF-16 string and optionally records edits.36* Casing is locale-dependent and context-sensitive.37* The result may be longer or shorter than the original.38* The source string and the destination buffer must not overlap.39*40* @param locale The locale ID. ("" = root locale, NULL = default locale.)41* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.42* @param src The original string.43* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.44* @param dest A buffer for the result string. The result will be NUL-terminated if45* the buffer is large enough.46* The contents is undefined in case of failure.47* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then48* dest may be NULL and the function will only return the length of the result49* without writing any of the result string.50* @param edits Records edits for index mapping, working with styled text,51* and getting only changes (if any).52* The Edits contents is undefined if any error occurs.53* This function calls edits->reset() first unless54* options includes U_EDITS_NO_RESET. edits can be NULL.55* @param errorCode Reference to an in/out error code value56* which must not indicate a failure before the function call.57* @return The length of the result string, if successful.58* When the result would be longer than destCapacity,59* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.60*61* @see u_strToLower62* @stable ICU 5963*/64static int32_t toLower(65const char *locale, uint32_t options,66const char16_t *src, int32_t srcLength,67char16_t *dest, int32_t destCapacity, Edits *edits,68UErrorCode &errorCode);6970/**71* Uppercases a UTF-16 string and optionally records edits.72* Casing is locale-dependent and context-sensitive.73* The result may be longer or shorter than the original.74* The source string and the destination buffer must not overlap.75*76* @param locale The locale ID. ("" = root locale, NULL = default locale.)77* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.78* @param src The original string.79* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.80* @param dest A buffer for the result string. The result will be NUL-terminated if81* the buffer is large enough.82* The contents is undefined in case of failure.83* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then84* dest may be NULL and the function will only return the length of the result85* without writing any of the result string.86* @param edits Records edits for index mapping, working with styled text,87* and getting only changes (if any).88* The Edits contents is undefined if any error occurs.89* This function calls edits->reset() first unless90* options includes U_EDITS_NO_RESET. edits can be NULL.91* @param errorCode Reference to an in/out error code value92* which must not indicate a failure before the function call.93* @return The length of the result string, if successful.94* When the result would be longer than destCapacity,95* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.96*97* @see u_strToUpper98* @stable ICU 5999*/100static int32_t toUpper(101const char *locale, uint32_t options,102const char16_t *src, int32_t srcLength,103char16_t *dest, int32_t destCapacity, Edits *edits,104UErrorCode &errorCode);105106#if !UCONFIG_NO_BREAK_ITERATION107108/**109* Titlecases a UTF-16 string and optionally records edits.110* Casing is locale-dependent and context-sensitive.111* The result may be longer or shorter than the original.112* The source string and the destination buffer must not overlap.113*114* Titlecasing uses a break iterator to find the first characters of words115* that are to be titlecased. It titlecases those characters and lowercases116* all others. (This can be modified with options bits.)117*118* @param locale The locale ID. ("" = root locale, NULL = default locale.)119* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,120* U_TITLECASE_NO_LOWERCASE,121* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,122* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.123* @param iter A break iterator to find the first characters of words that are to be titlecased.124* It is set to the source string (setText())125* and used one or more times for iteration (first() and next()).126* If NULL, then a word break iterator for the locale is used127* (or something equivalent).128* @param src The original string.129* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.130* @param dest A buffer for the result string. The result will be NUL-terminated if131* the buffer is large enough.132* The contents is undefined in case of failure.133* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then134* dest may be NULL and the function will only return the length of the result135* without writing any of the result string.136* @param edits Records edits for index mapping, working with styled text,137* and getting only changes (if any).138* The Edits contents is undefined if any error occurs.139* This function calls edits->reset() first unless140* options includes U_EDITS_NO_RESET. edits can be NULL.141* @param errorCode Reference to an in/out error code value142* which must not indicate a failure before the function call.143* @return The length of the result string, if successful.144* When the result would be longer than destCapacity,145* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.146*147* @see u_strToTitle148* @see ucasemap_toTitle149* @stable ICU 59150*/151static int32_t toTitle(152const char *locale, uint32_t options, BreakIterator *iter,153const char16_t *src, int32_t srcLength,154char16_t *dest, int32_t destCapacity, Edits *edits,155UErrorCode &errorCode);156157#endif // UCONFIG_NO_BREAK_ITERATION158159/**160* Case-folds a UTF-16 string and optionally records edits.161*162* Case folding is locale-independent and not context-sensitive,163* but there is an option for whether to include or exclude mappings for dotted I164* and dotless i that are marked with 'T' in CaseFolding.txt.165*166* The result may be longer or shorter than the original.167* The source string and the destination buffer must not overlap.168*169* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,170* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.171* @param src The original string.172* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.173* @param dest A buffer for the result string. The result will be NUL-terminated if174* the buffer is large enough.175* The contents is undefined in case of failure.176* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then177* dest may be NULL and the function will only return the length of the result178* without writing any of the result string.179* @param edits Records edits for index mapping, working with styled text,180* and getting only changes (if any).181* The Edits contents is undefined if any error occurs.182* This function calls edits->reset() first unless183* options includes U_EDITS_NO_RESET. edits can be NULL.184* @param errorCode Reference to an in/out error code value185* which must not indicate a failure before the function call.186* @return The length of the result string, if successful.187* When the result would be longer than destCapacity,188* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.189*190* @see u_strFoldCase191* @stable ICU 59192*/193static int32_t fold(194uint32_t options,195const char16_t *src, int32_t srcLength,196char16_t *dest, int32_t destCapacity, Edits *edits,197UErrorCode &errorCode);198199/**200* Lowercases a UTF-8 string and optionally records edits.201* Casing is locale-dependent and context-sensitive.202* The result may be longer or shorter than the original.203*204* @param locale The locale ID. ("" = root locale, NULL = default locale.)205* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.206* @param src The original string.207* @param sink A ByteSink to which the result string is written.208* sink.Flush() is called at the end.209* @param edits Records edits for index mapping, working with styled text,210* and getting only changes (if any).211* The Edits contents is undefined if any error occurs.212* This function calls edits->reset() first unless213* options includes U_EDITS_NO_RESET. edits can be NULL.214* @param errorCode Reference to an in/out error code value215* which must not indicate a failure before the function call.216*217* @see ucasemap_utf8ToLower218* @stable ICU 60219*/220static void utf8ToLower(221const char *locale, uint32_t options,222StringPiece src, ByteSink &sink, Edits *edits,223UErrorCode &errorCode);224225/**226* Uppercases a UTF-8 string and optionally records edits.227* Casing is locale-dependent and context-sensitive.228* The result may be longer or shorter than the original.229*230* @param locale The locale ID. ("" = root locale, NULL = default locale.)231* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.232* @param src The original string.233* @param sink A ByteSink to which the result string is written.234* sink.Flush() is called at the end.235* @param edits Records edits for index mapping, working with styled text,236* and getting only changes (if any).237* The Edits contents is undefined if any error occurs.238* This function calls edits->reset() first unless239* options includes U_EDITS_NO_RESET. edits can be NULL.240* @param errorCode Reference to an in/out error code value241* which must not indicate a failure before the function call.242*243* @see ucasemap_utf8ToUpper244* @stable ICU 60245*/246static void utf8ToUpper(247const char *locale, uint32_t options,248StringPiece src, ByteSink &sink, Edits *edits,249UErrorCode &errorCode);250251#if !UCONFIG_NO_BREAK_ITERATION252253/**254* Titlecases a UTF-8 string and optionally records edits.255* Casing is locale-dependent and context-sensitive.256* The result may be longer or shorter than the original.257*258* Titlecasing uses a break iterator to find the first characters of words259* that are to be titlecased. It titlecases those characters and lowercases260* all others. (This can be modified with options bits.)261*262* @param locale The locale ID. ("" = root locale, NULL = default locale.)263* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,264* U_TITLECASE_NO_LOWERCASE,265* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,266* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.267* @param iter A break iterator to find the first characters of words that are to be titlecased.268* It is set to the source string (setUText())269* and used one or more times for iteration (first() and next()).270* If NULL, then a word break iterator for the locale is used271* (or something equivalent).272* @param src The original string.273* @param sink A ByteSink to which the result string is written.274* sink.Flush() is called at the end.275* @param edits Records edits for index mapping, working with styled text,276* and getting only changes (if any).277* The Edits contents is undefined if any error occurs.278* This function calls edits->reset() first unless279* options includes U_EDITS_NO_RESET. edits can be NULL.280* @param errorCode Reference to an in/out error code value281* which must not indicate a failure before the function call.282*283* @see ucasemap_utf8ToTitle284* @stable ICU 60285*/286static void utf8ToTitle(287const char *locale, uint32_t options, BreakIterator *iter,288StringPiece src, ByteSink &sink, Edits *edits,289UErrorCode &errorCode);290291#endif // UCONFIG_NO_BREAK_ITERATION292293/**294* Case-folds a UTF-8 string and optionally records edits.295*296* Case folding is locale-independent and not context-sensitive,297* but there is an option for whether to include or exclude mappings for dotted I298* and dotless i that are marked with 'T' in CaseFolding.txt.299*300* The result may be longer or shorter than the original.301*302* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.303* @param src The original string.304* @param sink A ByteSink to which the result string is written.305* sink.Flush() is called at the end.306* @param edits Records edits for index mapping, working with styled text,307* and getting only changes (if any).308* The Edits contents is undefined if any error occurs.309* This function calls edits->reset() first unless310* options includes U_EDITS_NO_RESET. edits can be NULL.311* @param errorCode Reference to an in/out error code value312* which must not indicate a failure before the function call.313*314* @see ucasemap_utf8FoldCase315* @stable ICU 60316*/317static void utf8Fold(318uint32_t options,319StringPiece src, ByteSink &sink, Edits *edits,320UErrorCode &errorCode);321322/**323* Lowercases a UTF-8 string and optionally records edits.324* Casing is locale-dependent and context-sensitive.325* The result may be longer or shorter than the original.326* The source string and the destination buffer must not overlap.327*328* @param locale The locale ID. ("" = root locale, NULL = default locale.)329* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.330* @param src The original string.331* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.332* @param dest A buffer for the result string. The result will be NUL-terminated if333* the buffer is large enough.334* The contents is undefined in case of failure.335* @param destCapacity The size of the buffer (number of bytes). If it is 0, then336* dest may be NULL and the function will only return the length of the result337* without writing any of the result string.338* @param edits Records edits for index mapping, working with styled text,339* and getting only changes (if any).340* The Edits contents is undefined if any error occurs.341* This function calls edits->reset() first unless342* options includes U_EDITS_NO_RESET. edits can be NULL.343* @param errorCode Reference to an in/out error code value344* which must not indicate a failure before the function call.345* @return The length of the result string, if successful.346* When the result would be longer than destCapacity,347* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.348*349* @see ucasemap_utf8ToLower350* @stable ICU 59351*/352static int32_t utf8ToLower(353const char *locale, uint32_t options,354const char *src, int32_t srcLength,355char *dest, int32_t destCapacity, Edits *edits,356UErrorCode &errorCode);357358/**359* Uppercases a UTF-8 string and optionally records edits.360* Casing is locale-dependent and context-sensitive.361* The result may be longer or shorter than the original.362* The source string and the destination buffer must not overlap.363*364* @param locale The locale ID. ("" = root locale, NULL = default locale.)365* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.366* @param src The original string.367* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.368* @param dest A buffer for the result string. The result will be NUL-terminated if369* the buffer is large enough.370* The contents is undefined in case of failure.371* @param destCapacity The size of the buffer (number of bytes). If it is 0, then372* dest may be NULL and the function will only return the length of the result373* without writing any of the result string.374* @param edits Records edits for index mapping, working with styled text,375* and getting only changes (if any).376* The Edits contents is undefined if any error occurs.377* This function calls edits->reset() first unless378* options includes U_EDITS_NO_RESET. edits can be NULL.379* @param errorCode Reference to an in/out error code value380* which must not indicate a failure before the function call.381* @return The length of the result string, if successful.382* When the result would be longer than destCapacity,383* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.384*385* @see ucasemap_utf8ToUpper386* @stable ICU 59387*/388static int32_t utf8ToUpper(389const char *locale, uint32_t options,390const char *src, int32_t srcLength,391char *dest, int32_t destCapacity, Edits *edits,392UErrorCode &errorCode);393394#if !UCONFIG_NO_BREAK_ITERATION395396/**397* Titlecases a UTF-8 string and optionally records edits.398* Casing is locale-dependent and context-sensitive.399* The result may be longer or shorter than the original.400* The source string and the destination buffer must not overlap.401*402* Titlecasing uses a break iterator to find the first characters of words403* that are to be titlecased. It titlecases those characters and lowercases404* all others. (This can be modified with options bits.)405*406* @param locale The locale ID. ("" = root locale, NULL = default locale.)407* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,408* U_TITLECASE_NO_LOWERCASE,409* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,410* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.411* @param iter A break iterator to find the first characters of words that are to be titlecased.412* It is set to the source string (setUText())413* and used one or more times for iteration (first() and next()).414* If NULL, then a word break iterator for the locale is used415* (or something equivalent).416* @param src The original string.417* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.418* @param dest A buffer for the result string. The result will be NUL-terminated if419* the buffer is large enough.420* The contents is undefined in case of failure.421* @param destCapacity The size of the buffer (number of bytes). If it is 0, then422* dest may be NULL and the function will only return the length of the result423* without writing any of the result string.424* @param edits Records edits for index mapping, working with styled text,425* and getting only changes (if any).426* The Edits contents is undefined if any error occurs.427* This function calls edits->reset() first unless428* options includes U_EDITS_NO_RESET. edits can be NULL.429* @param errorCode Reference to an in/out error code value430* which must not indicate a failure before the function call.431* @return The length of the result string, if successful.432* When the result would be longer than destCapacity,433* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.434*435* @see ucasemap_utf8ToTitle436* @stable ICU 59437*/438static int32_t utf8ToTitle(439const char *locale, uint32_t options, BreakIterator *iter,440const char *src, int32_t srcLength,441char *dest, int32_t destCapacity, Edits *edits,442UErrorCode &errorCode);443444#endif // UCONFIG_NO_BREAK_ITERATION445446/**447* Case-folds a UTF-8 string and optionally records edits.448*449* Case folding is locale-independent and not context-sensitive,450* but there is an option for whether to include or exclude mappings for dotted I451* and dotless i that are marked with 'T' in CaseFolding.txt.452*453* The result may be longer or shorter than the original.454* The source string and the destination buffer must not overlap.455*456* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,457* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.458* @param src The original string.459* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.460* @param dest A buffer for the result string. The result will be NUL-terminated if461* the buffer is large enough.462* The contents is undefined in case of failure.463* @param destCapacity The size of the buffer (number of bytes). If it is 0, then464* dest may be NULL and the function will only return the length of the result465* without writing any of the result string.466* @param edits Records edits for index mapping, working with styled text,467* and getting only changes (if any).468* The Edits contents is undefined if any error occurs.469* This function calls edits->reset() first unless470* options includes U_EDITS_NO_RESET. edits can be NULL.471* @param errorCode Reference to an in/out error code value472* which must not indicate a failure before the function call.473* @return The length of the result string, if successful.474* When the result would be longer than destCapacity,475* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.476*477* @see ucasemap_utf8FoldCase478* @stable ICU 59479*/480static int32_t utf8Fold(481uint32_t options,482const char *src, int32_t srcLength,483char *dest, int32_t destCapacity, Edits *edits,484UErrorCode &errorCode);485486private:487CaseMap() = delete;488CaseMap(const CaseMap &other) = delete;489CaseMap &operator=(const CaseMap &other) = delete;490};491492U_NAMESPACE_END493494#endif /* U_SHOW_CPLUSPLUS_API */495496#endif // __CASEMAP_H__497498499