Path: blob/jdk8u272-b10-aarch32-20201026/jdk/src/share/native/common/unicode/caniter.h
48729 views
// © 2016 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html2/*3*******************************************************************************4* Copyright (C) 1996-2014, International Business Machines Corporation and5* others. All Rights Reserved.6*******************************************************************************7*/89#ifndef CANITER_H10#define CANITER_H1112#include "unicode/utypes.h"1314#if !UCONFIG_NO_NORMALIZATION1516#include "unicode/uobject.h"17#include "unicode/unistr.h"1819/**20* \file21* \brief C++ API: Canonical Iterator22*/2324/** Should permutation skip characters with combining class zero25* Should be either TRUE or FALSE. This is a compile time option26* @stable ICU 2.427*/28#ifndef CANITER_SKIP_ZEROES29#define CANITER_SKIP_ZEROES TRUE30#endif3132U_NAMESPACE_BEGIN3334class Hashtable;35class Normalizer2;36class Normalizer2Impl;3738/**39* This class allows one to iterate through all the strings that are canonically equivalent to a given40* string. For example, here are some sample results:41Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}421: \\u0041\\u030A\\u0064\\u0307\\u032743= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}442: \\u0041\\u030A\\u0064\\u0327\\u030745= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}463: \\u0041\\u030A\\u1E0B\\u032747= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}484: \\u0041\\u030A\\u1E11\\u030749= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}505: \\u00C5\\u0064\\u0307\\u032751= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}526: \\u00C5\\u0064\\u0327\\u030753= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}547: \\u00C5\\u1E0B\\u032755= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}568: \\u00C5\\u1E11\\u030757= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}589: \\u212B\\u0064\\u0307\\u032759= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}6010: \\u212B\\u0064\\u0327\\u030761= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}6211: \\u212B\\u1E0B\\u032763= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}6412: \\u212B\\u1E11\\u030765= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}66*<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,67* since it has not been optimized for that situation.68* Note, CanonicalIterator is not intended to be subclassed.69* @author M. Davis70* @author C++ port by V. Weinstein71* @stable ICU 2.472*/73class U_COMMON_API CanonicalIterator U_FINAL : public UObject {74public:75/**76* Construct a CanonicalIterator object77* @param source string to get results for78* @param status Fill-in parameter which receives the status of this operation.79* @stable ICU 2.480*/81CanonicalIterator(const UnicodeString &source, UErrorCode &status);8283/** Destructor84* Cleans pieces85* @stable ICU 2.486*/87virtual ~CanonicalIterator();8889/**90* Gets the NFD form of the current source we are iterating over.91* @return gets the source: NOTE: it is the NFD form of source92* @stable ICU 2.493*/94UnicodeString getSource();9596/**97* Resets the iterator so that one can start again from the beginning.98* @stable ICU 2.499*/100void reset();101102/**103* Get the next canonically equivalent string.104* <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>105* @return the next string that is canonically equivalent. A bogus string is returned when106* the iteration is done.107* @stable ICU 2.4108*/109UnicodeString next();110111/**112* Set a new source for this iterator. Allows object reuse.113* @param newSource the source string to iterate against. This allows the same iterator to be used114* while changing the source string, saving object creation.115* @param status Fill-in parameter which receives the status of this operation.116* @stable ICU 2.4117*/118void setSource(const UnicodeString &newSource, UErrorCode &status);119120#ifndef U_HIDE_INTERNAL_API121/**122* Dumb recursive implementation of permutation.123* TODO: optimize124* @param source the string to find permutations for125* @param skipZeros determine if skip zeros126* @param result the results in a set.127* @param status Fill-in parameter which receives the status of this operation.128* @internal129*/130static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);131#endif /* U_HIDE_INTERNAL_API */132133/**134* ICU "poor man's RTTI", returns a UClassID for this class.135*136* @stable ICU 2.2137*/138static UClassID U_EXPORT2 getStaticClassID();139140/**141* ICU "poor man's RTTI", returns a UClassID for the actual class.142*143* @stable ICU 2.2144*/145virtual UClassID getDynamicClassID() const;146147private:148// ===================== PRIVATES ==============================149// private default constructor150CanonicalIterator();151152153/**154* Copy constructor. Private for now.155* @internal (private)156*/157CanonicalIterator(const CanonicalIterator& other);158159/**160* Assignment operator. Private for now.161* @internal (private)162*/163CanonicalIterator& operator=(const CanonicalIterator& other);164165// fields166UnicodeString source;167UBool done;168169// 2 dimensional array holds the pieces of the string with170// their different canonically equivalent representations171UnicodeString **pieces;172int32_t pieces_length;173int32_t *pieces_lengths;174175// current is used in iterating to combine pieces176int32_t *current;177int32_t current_length;178179// transient fields180UnicodeString buffer;181182const Normalizer2 &nfd;183const Normalizer2Impl &nfcImpl;184185// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.186UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)187188//Set getEquivalents2(String segment);189Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status);190//Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);191192/**193* See if the decomposition of cp2 is at segment starting at segmentPos194* (with canonical rearrangment!)195* If so, take the remainder, and return the equivalents196*/197//Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);198Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);199//Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);200201void cleanPieces();202203};204205U_NAMESPACE_END206207#endif /* #if !UCONFIG_NO_NORMALIZATION */208209#endif210211212