Path: blob/jdk8u272-b10-aarch32-20201026/jdk/src/share/native/common/unicode/casemap.h
48773 views
// © 2017 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html23// casemap.h4// created: 2017jan12 Markus W. Scherer56#ifndef __CASEMAP_H__7#define __CASEMAP_H__89#include "unicode/utypes.h"10#include "unicode/stringpiece.h"11#include "unicode/uobject.h"1213/**14* \file15* \brief C++ API: Low-level C++ case mapping functions.16*/1718U_NAMESPACE_BEGIN1920class BreakIterator;21class ByteSink;22class Edits;2324/**25* Low-level C++ case mapping functions.26*27* @stable ICU 5928*/29class U_COMMON_API CaseMap U_FINAL : public UMemory {30public:31/**32* Lowercases a UTF-16 string and optionally records edits.33* Casing is locale-dependent and context-sensitive.34* The result may be longer or shorter than the original.35* The source string and the destination buffer must not overlap.36*37* @param locale The locale ID. ("" = root locale, NULL = default locale.)38* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.39* @param src The original string.40* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.41* @param dest A buffer for the result string. The result will be NUL-terminated if42* the buffer is large enough.43* The contents is undefined in case of failure.44* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then45* dest may be NULL and the function will only return the length of the result46* without writing any of the result string.47* @param edits Records edits for index mapping, working with styled text,48* and getting only changes (if any).49* The Edits contents is undefined if any error occurs.50* This function calls edits->reset() first unless51* options includes U_EDITS_NO_RESET. edits can be NULL.52* @param errorCode Reference to an in/out error code value53* which must not indicate a failure before the function call.54* @return The length of the result string, if successful.55* When the result would be longer than destCapacity,56* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.57*58* @see u_strToLower59* @stable ICU 5960*/61static int32_t toLower(62const char *locale, uint32_t options,63const char16_t *src, int32_t srcLength,64char16_t *dest, int32_t destCapacity, Edits *edits,65UErrorCode &errorCode);6667/**68* Uppercases a UTF-16 string and optionally records edits.69* Casing is locale-dependent and context-sensitive.70* The result may be longer or shorter than the original.71* The source string and the destination buffer must not overlap.72*73* @param locale The locale ID. ("" = root locale, NULL = default locale.)74* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.75* @param src The original string.76* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.77* @param dest A buffer for the result string. The result will be NUL-terminated if78* the buffer is large enough.79* The contents is undefined in case of failure.80* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then81* dest may be NULL and the function will only return the length of the result82* without writing any of the result string.83* @param edits Records edits for index mapping, working with styled text,84* and getting only changes (if any).85* The Edits contents is undefined if any error occurs.86* This function calls edits->reset() first unless87* options includes U_EDITS_NO_RESET. edits can be NULL.88* @param errorCode Reference to an in/out error code value89* which must not indicate a failure before the function call.90* @return The length of the result string, if successful.91* When the result would be longer than destCapacity,92* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.93*94* @see u_strToUpper95* @stable ICU 5996*/97static int32_t toUpper(98const char *locale, uint32_t options,99const char16_t *src, int32_t srcLength,100char16_t *dest, int32_t destCapacity, Edits *edits,101UErrorCode &errorCode);102103#if !UCONFIG_NO_BREAK_ITERATION104105/**106* Titlecases a UTF-16 string and optionally records edits.107* Casing is locale-dependent and context-sensitive.108* The result may be longer or shorter than the original.109* The source string and the destination buffer must not overlap.110*111* Titlecasing uses a break iterator to find the first characters of words112* that are to be titlecased. It titlecases those characters and lowercases113* all others. (This can be modified with options bits.)114*115* @param locale The locale ID. ("" = root locale, NULL = default locale.)116* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,117* U_TITLECASE_NO_LOWERCASE,118* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,119* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.120* @param iter A break iterator to find the first characters of words that are to be titlecased.121* It is set to the source string (setText())122* and used one or more times for iteration (first() and next()).123* If NULL, then a word break iterator for the locale is used124* (or something equivalent).125* @param src The original string.126* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.127* @param dest A buffer for the result string. The result will be NUL-terminated if128* the buffer is large enough.129* The contents is undefined in case of failure.130* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then131* dest may be NULL and the function will only return the length of the result132* without writing any of the result string.133* @param edits Records edits for index mapping, working with styled text,134* and getting only changes (if any).135* The Edits contents is undefined if any error occurs.136* This function calls edits->reset() first unless137* options includes U_EDITS_NO_RESET. edits can be NULL.138* @param errorCode Reference to an in/out error code value139* which must not indicate a failure before the function call.140* @return The length of the result string, if successful.141* When the result would be longer than destCapacity,142* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.143*144* @see u_strToTitle145* @see ucasemap_toTitle146* @stable ICU 59147*/148static int32_t toTitle(149const char *locale, uint32_t options, BreakIterator *iter,150const char16_t *src, int32_t srcLength,151char16_t *dest, int32_t destCapacity, Edits *edits,152UErrorCode &errorCode);153154#endif // UCONFIG_NO_BREAK_ITERATION155156/**157* Case-folds a UTF-16 string and optionally records edits.158*159* Case folding is locale-independent and not context-sensitive,160* but there is an option for whether to include or exclude mappings for dotted I161* and dotless i that are marked with 'T' in CaseFolding.txt.162*163* The result may be longer or shorter than the original.164* The source string and the destination buffer must not overlap.165*166* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,167* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.168* @param src The original string.169* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.170* @param dest A buffer for the result string. The result will be NUL-terminated if171* the buffer is large enough.172* The contents is undefined in case of failure.173* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then174* dest may be NULL and the function will only return the length of the result175* without writing any of the result string.176* @param edits Records edits for index mapping, working with styled text,177* and getting only changes (if any).178* The Edits contents is undefined if any error occurs.179* This function calls edits->reset() first unless180* options includes U_EDITS_NO_RESET. edits can be NULL.181* @param errorCode Reference to an in/out error code value182* which must not indicate a failure before the function call.183* @return The length of the result string, if successful.184* When the result would be longer than destCapacity,185* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.186*187* @see u_strFoldCase188* @stable ICU 59189*/190static int32_t fold(191uint32_t options,192const char16_t *src, int32_t srcLength,193char16_t *dest, int32_t destCapacity, Edits *edits,194UErrorCode &errorCode);195196/**197* Lowercases a UTF-8 string and optionally records edits.198* Casing is locale-dependent and context-sensitive.199* The result may be longer or shorter than the original.200*201* @param locale The locale ID. ("" = root locale, NULL = default locale.)202* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.203* @param src The original string.204* @param sink A ByteSink to which the result string is written.205* sink.Flush() is called at the end.206* @param edits Records edits for index mapping, working with styled text,207* and getting only changes (if any).208* The Edits contents is undefined if any error occurs.209* This function calls edits->reset() first unless210* options includes U_EDITS_NO_RESET. edits can be NULL.211* @param errorCode Reference to an in/out error code value212* which must not indicate a failure before the function call.213*214* @see ucasemap_utf8ToLower215* @stable ICU 60216*/217static void utf8ToLower(218const char *locale, uint32_t options,219StringPiece src, ByteSink &sink, Edits *edits,220UErrorCode &errorCode);221222/**223* Uppercases a UTF-8 string and optionally records edits.224* Casing is locale-dependent and context-sensitive.225* The result may be longer or shorter than the original.226*227* @param locale The locale ID. ("" = root locale, NULL = default locale.)228* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.229* @param src The original string.230* @param sink A ByteSink to which the result string is written.231* sink.Flush() is called at the end.232* @param edits Records edits for index mapping, working with styled text,233* and getting only changes (if any).234* The Edits contents is undefined if any error occurs.235* This function calls edits->reset() first unless236* options includes U_EDITS_NO_RESET. edits can be NULL.237* @param errorCode Reference to an in/out error code value238* which must not indicate a failure before the function call.239*240* @see ucasemap_utf8ToUpper241* @stable ICU 60242*/243static void utf8ToUpper(244const char *locale, uint32_t options,245StringPiece src, ByteSink &sink, Edits *edits,246UErrorCode &errorCode);247248#if !UCONFIG_NO_BREAK_ITERATION249250/**251* Titlecases a UTF-8 string and optionally records edits.252* Casing is locale-dependent and context-sensitive.253* The result may be longer or shorter than the original.254*255* Titlecasing uses a break iterator to find the first characters of words256* that are to be titlecased. It titlecases those characters and lowercases257* all others. (This can be modified with options bits.)258*259* @param locale The locale ID. ("" = root locale, NULL = default locale.)260* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,261* U_TITLECASE_NO_LOWERCASE,262* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,263* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.264* @param iter A break iterator to find the first characters of words that are to be titlecased.265* It is set to the source string (setUText())266* and used one or more times for iteration (first() and next()).267* If NULL, then a word break iterator for the locale is used268* (or something equivalent).269* @param src The original string.270* @param sink A ByteSink to which the result string is written.271* sink.Flush() is called at the end.272* @param edits Records edits for index mapping, working with styled text,273* and getting only changes (if any).274* The Edits contents is undefined if any error occurs.275* This function calls edits->reset() first unless276* options includes U_EDITS_NO_RESET. edits can be NULL.277* @param errorCode Reference to an in/out error code value278* which must not indicate a failure before the function call.279*280* @see ucasemap_utf8ToTitle281* @stable ICU 60282*/283static void utf8ToTitle(284const char *locale, uint32_t options, BreakIterator *iter,285StringPiece src, ByteSink &sink, Edits *edits,286UErrorCode &errorCode);287288#endif // UCONFIG_NO_BREAK_ITERATION289290/**291* Case-folds a UTF-8 string and optionally records edits.292*293* Case folding is locale-independent and not context-sensitive,294* but there is an option for whether to include or exclude mappings for dotted I295* and dotless i that are marked with 'T' in CaseFolding.txt.296*297* The result may be longer or shorter than the original.298*299* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.300* @param src The original string.301* @param sink A ByteSink to which the result string is written.302* sink.Flush() is called at the end.303* @param edits Records edits for index mapping, working with styled text,304* and getting only changes (if any).305* The Edits contents is undefined if any error occurs.306* This function calls edits->reset() first unless307* options includes U_EDITS_NO_RESET. edits can be NULL.308* @param errorCode Reference to an in/out error code value309* which must not indicate a failure before the function call.310*311* @see ucasemap_utf8FoldCase312* @stable ICU 60313*/314static void utf8Fold(315uint32_t options,316StringPiece src, ByteSink &sink, Edits *edits,317UErrorCode &errorCode);318319/**320* Lowercases a UTF-8 string and optionally records edits.321* Casing is locale-dependent and context-sensitive.322* The result may be longer or shorter than the original.323* The source string and the destination buffer must not overlap.324*325* @param locale The locale ID. ("" = root locale, NULL = default locale.)326* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.327* @param src The original string.328* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.329* @param dest A buffer for the result string. The result will be NUL-terminated if330* the buffer is large enough.331* The contents is undefined in case of failure.332* @param destCapacity The size of the buffer (number of bytes). If it is 0, then333* dest may be NULL and the function will only return the length of the result334* without writing any of the result string.335* @param edits Records edits for index mapping, working with styled text,336* and getting only changes (if any).337* The Edits contents is undefined if any error occurs.338* This function calls edits->reset() first unless339* options includes U_EDITS_NO_RESET. edits can be NULL.340* @param errorCode Reference to an in/out error code value341* which must not indicate a failure before the function call.342* @return The length of the result string, if successful.343* When the result would be longer than destCapacity,344* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.345*346* @see ucasemap_utf8ToLower347* @stable ICU 59348*/349static int32_t utf8ToLower(350const char *locale, uint32_t options,351const char *src, int32_t srcLength,352char *dest, int32_t destCapacity, Edits *edits,353UErrorCode &errorCode);354355/**356* Uppercases a UTF-8 string and optionally records edits.357* Casing is locale-dependent and context-sensitive.358* The result may be longer or shorter than the original.359* The source string and the destination buffer must not overlap.360*361* @param locale The locale ID. ("" = root locale, NULL = default locale.)362* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.363* @param src The original string.364* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.365* @param dest A buffer for the result string. The result will be NUL-terminated if366* the buffer is large enough.367* The contents is undefined in case of failure.368* @param destCapacity The size of the buffer (number of bytes). If it is 0, then369* dest may be NULL and the function will only return the length of the result370* without writing any of the result string.371* @param edits Records edits for index mapping, working with styled text,372* and getting only changes (if any).373* The Edits contents is undefined if any error occurs.374* This function calls edits->reset() first unless375* options includes U_EDITS_NO_RESET. edits can be NULL.376* @param errorCode Reference to an in/out error code value377* which must not indicate a failure before the function call.378* @return The length of the result string, if successful.379* When the result would be longer than destCapacity,380* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.381*382* @see ucasemap_utf8ToUpper383* @stable ICU 59384*/385static int32_t utf8ToUpper(386const char *locale, uint32_t options,387const char *src, int32_t srcLength,388char *dest, int32_t destCapacity, Edits *edits,389UErrorCode &errorCode);390391#if !UCONFIG_NO_BREAK_ITERATION392393/**394* Titlecases a UTF-8 string and optionally records edits.395* Casing is locale-dependent and context-sensitive.396* The result may be longer or shorter than the original.397* The source string and the destination buffer must not overlap.398*399* Titlecasing uses a break iterator to find the first characters of words400* that are to be titlecased. It titlecases those characters and lowercases401* all others. (This can be modified with options bits.)402*403* @param locale The locale ID. ("" = root locale, NULL = default locale.)404* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,405* U_TITLECASE_NO_LOWERCASE,406* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,407* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.408* @param iter A break iterator to find the first characters of words that are to be titlecased.409* It is set to the source string (setUText())410* and used one or more times for iteration (first() and next()).411* If NULL, then a word break iterator for the locale is used412* (or something equivalent).413* @param src The original string.414* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.415* @param dest A buffer for the result string. The result will be NUL-terminated if416* the buffer is large enough.417* The contents is undefined in case of failure.418* @param destCapacity The size of the buffer (number of bytes). If it is 0, then419* dest may be NULL and the function will only return the length of the result420* without writing any of the result string.421* @param edits Records edits for index mapping, working with styled text,422* and getting only changes (if any).423* The Edits contents is undefined if any error occurs.424* This function calls edits->reset() first unless425* options includes U_EDITS_NO_RESET. edits can be NULL.426* @param errorCode Reference to an in/out error code value427* which must not indicate a failure before the function call.428* @return The length of the result string, if successful.429* When the result would be longer than destCapacity,430* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.431*432* @see ucasemap_utf8ToTitle433* @stable ICU 59434*/435static int32_t utf8ToTitle(436const char *locale, uint32_t options, BreakIterator *iter,437const char *src, int32_t srcLength,438char *dest, int32_t destCapacity, Edits *edits,439UErrorCode &errorCode);440441#endif // UCONFIG_NO_BREAK_ITERATION442443/**444* Case-folds a UTF-8 string and optionally records edits.445*446* Case folding is locale-independent and not context-sensitive,447* but there is an option for whether to include or exclude mappings for dotted I448* and dotless i that are marked with 'T' in CaseFolding.txt.449*450* The result may be longer or shorter than the original.451* The source string and the destination buffer must not overlap.452*453* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,454* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.455* @param src The original string.456* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.457* @param dest A buffer for the result string. The result will be NUL-terminated if458* the buffer is large enough.459* The contents is undefined in case of failure.460* @param destCapacity The size of the buffer (number of bytes). If it is 0, then461* dest may be NULL and the function will only return the length of the result462* without writing any of the result string.463* @param edits Records edits for index mapping, working with styled text,464* and getting only changes (if any).465* The Edits contents is undefined if any error occurs.466* This function calls edits->reset() first unless467* options includes U_EDITS_NO_RESET. edits can be NULL.468* @param errorCode Reference to an in/out error code value469* which must not indicate a failure before the function call.470* @return The length of the result string, if successful.471* When the result would be longer than destCapacity,472* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.473*474* @see ucasemap_utf8FoldCase475* @stable ICU 59476*/477static int32_t utf8Fold(478uint32_t options,479const char *src, int32_t srcLength,480char *dest, int32_t destCapacity, Edits *edits,481UErrorCode &errorCode);482483private:484CaseMap() = delete;485CaseMap(const CaseMap &other) = delete;486CaseMap &operator=(const CaseMap &other) = delete;487};488489U_NAMESPACE_END490491#endif // __CASEMAP_H__492493494