Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/caniter.h
38827 views
/*1*******************************************************************************2* Copyright (C) 1996-2014, International Business Machines Corporation and3* others. All Rights Reserved.4*******************************************************************************5*/67#ifndef CANITER_H8#define CANITER_H910#include "unicode/utypes.h"1112#if !UCONFIG_NO_NORMALIZATION1314#include "unicode/uobject.h"15#include "unicode/unistr.h"1617/**18* \file19* \brief C++ API: Canonical Iterator20*/2122/** Should permutation skip characters with combining class zero23* Should be either TRUE or FALSE. This is a compile time option24* @stable ICU 2.425*/26#ifndef CANITER_SKIP_ZEROES27#define CANITER_SKIP_ZEROES TRUE28#endif2930U_NAMESPACE_BEGIN3132class Hashtable;33class Normalizer2;34class Normalizer2Impl;3536/**37* This class allows one to iterate through all the strings that are canonically equivalent to a given38* string. For example, here are some sample results:39Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}401: \\u0041\\u030A\\u0064\\u0307\\u032741= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}422: \\u0041\\u030A\\u0064\\u0327\\u030743= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}443: \\u0041\\u030A\\u1E0B\\u032745= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}464: \\u0041\\u030A\\u1E11\\u030747= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}485: \\u00C5\\u0064\\u0307\\u032749= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}506: \\u00C5\\u0064\\u0327\\u030751= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}527: \\u00C5\\u1E0B\\u032753= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}548: \\u00C5\\u1E11\\u030755= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}569: \\u212B\\u0064\\u0307\\u032757= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}5810: \\u212B\\u0064\\u0327\\u030759= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}6011: \\u212B\\u1E0B\\u032761= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}6212: \\u212B\\u1E11\\u030763= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}64*<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,65* since it has not been optimized for that situation.66* Note, CanonicalIterator is not intended to be subclassed.67* @author M. Davis68* @author C++ port by V. Weinstein69* @stable ICU 2.470*/71class U_COMMON_API CanonicalIterator U_FINAL : public UObject {72public:73/**74* Construct a CanonicalIterator object75* @param source string to get results for76* @param status Fill-in parameter which receives the status of this operation.77* @stable ICU 2.478*/79CanonicalIterator(const UnicodeString &source, UErrorCode &status);8081/** Destructor82* Cleans pieces83* @stable ICU 2.484*/85virtual ~CanonicalIterator();8687/**88* Gets the NFD form of the current source we are iterating over.89* @return gets the source: NOTE: it is the NFD form of source90* @stable ICU 2.491*/92UnicodeString getSource();9394/**95* Resets the iterator so that one can start again from the beginning.96* @stable ICU 2.497*/98void reset();99100/**101* Get the next canonically equivalent string.102* <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>103* @return the next string that is canonically equivalent. A bogus string is returned when104* the iteration is done.105* @stable ICU 2.4106*/107UnicodeString next();108109/**110* Set a new source for this iterator. Allows object reuse.111* @param newSource the source string to iterate against. This allows the same iterator to be used112* while changing the source string, saving object creation.113* @param status Fill-in parameter which receives the status of this operation.114* @stable ICU 2.4115*/116void setSource(const UnicodeString &newSource, UErrorCode &status);117118#ifndef U_HIDE_INTERNAL_API119/**120* Dumb recursive implementation of permutation.121* TODO: optimize122* @param source the string to find permutations for123* @param skipZeros determine if skip zeros124* @param result the results in a set.125* @param status Fill-in parameter which receives the status of this operation.126* @internal127*/128static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);129#endif /* U_HIDE_INTERNAL_API */130131/**132* ICU "poor man's RTTI", returns a UClassID for this class.133*134* @stable ICU 2.2135*/136static UClassID U_EXPORT2 getStaticClassID();137138/**139* ICU "poor man's RTTI", returns a UClassID for the actual class.140*141* @stable ICU 2.2142*/143virtual UClassID getDynamicClassID() const;144145private:146// ===================== PRIVATES ==============================147// private default constructor148CanonicalIterator();149150151/**152* Copy constructor. Private for now.153* @internal154*/155CanonicalIterator(const CanonicalIterator& other);156157/**158* Assignment operator. Private for now.159* @internal160*/161CanonicalIterator& operator=(const CanonicalIterator& other);162163// fields164UnicodeString source;165UBool done;166167// 2 dimensional array holds the pieces of the string with168// their different canonically equivalent representations169UnicodeString **pieces;170int32_t pieces_length;171int32_t *pieces_lengths;172173// current is used in iterating to combine pieces174int32_t *current;175int32_t current_length;176177// transient fields178UnicodeString buffer;179180const Normalizer2 &nfd;181const Normalizer2Impl &nfcImpl;182183// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.184UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)185186//Set getEquivalents2(String segment);187Hashtable *getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status);188//Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);189190/**191* See if the decomposition of cp2 is at segment starting at segmentPos192* (with canonical rearrangment!)193* If so, take the remainder, and return the equivalents194*/195//Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);196Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);197//Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);198199void cleanPieces();200201};202203U_NAMESPACE_END204205#endif /* #if !UCONFIG_NO_NORMALIZATION */206207#endif208209210