// © 2016 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html2/**3************************************************************************************4* Copyright (C) 2006-2012, International Business Machines Corporation and others. *5* All Rights Reserved. *6************************************************************************************7*/89#ifndef BRKENG_H10#define BRKENG_H1112#include "unicode/utypes.h"13#include "unicode/uobject.h"14#include "unicode/utext.h"15#include "unicode/uscript.h"1617U_NAMESPACE_BEGIN1819class UnicodeSet;20class UStack;21class UVector32;22class DictionaryMatcher;2324/*******************************************************************25* LanguageBreakEngine26*/2728/**29* <p>LanguageBreakEngines implement language-specific knowledge for30* finding text boundaries within a run of characters belonging to a31* specific set. The boundaries will be of a specific kind, e.g. word,32* line, etc.</p>33*34* <p>LanguageBreakEngines should normally be implemented so as to35* be shared between threads without locking.</p>36*/37class LanguageBreakEngine : public UMemory {38public:3940/**41* <p>Default constructor.</p>42*43*/44LanguageBreakEngine();4546/**47* <p>Virtual destructor.</p>48*/49virtual ~LanguageBreakEngine();5051/**52* <p>Indicate whether this engine handles a particular character for53* a particular kind of break.</p>54*55* @param c A character which begins a run that the engine might handle56* @return true if this engine handles the particular character and break57* type.58*/59virtual UBool handles(UChar32 c) const = 0;6061/**62* <p>Find any breaks within a run in the supplied text.</p>63*64* @param text A UText representing the text. The65* iterator is left at the end of the run of characters which the engine66* is capable of handling.67* @param startPos The start of the run within the supplied text.68* @param endPos The end of the run within the supplied text.69* @param foundBreaks A Vector of int32_t to receive the breaks.70* @param status Information on any errors encountered.71* @return The number of breaks found.72*/73virtual int32_t findBreaks( UText *text,74int32_t startPos,75int32_t endPos,76UVector32 &foundBreaks,77UBool isPhraseBreaking,78UErrorCode &status) const = 0;7980};8182/*******************************************************************83* LanguageBreakFactory84*/8586/**87* <p>LanguageBreakFactorys find and return a LanguageBreakEngine88* that can determine breaks for characters in a specific set, if89* such an object can be found.</p>90*91* <p>If a LanguageBreakFactory is to be shared between threads,92* appropriate synchronization must be used; there is none internal93* to the factory.</p>94*95* <p>A LanguageBreakEngine returned by a LanguageBreakFactory can96* normally be shared between threads without synchronization, unless97* the specific subclass of LanguageBreakFactory indicates otherwise.</p>98*99* <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine100* it returns when it itself is deleted, unless the specific subclass of101* LanguageBreakFactory indicates otherwise. Naturally, the factory should102* not be deleted until the LanguageBreakEngines it has returned are no103* longer needed.</p>104*/105class LanguageBreakFactory : public UMemory {106public:107108/**109* <p>Default constructor.</p>110*111*/112LanguageBreakFactory();113114/**115* <p>Virtual destructor.</p>116*/117virtual ~LanguageBreakFactory();118119/**120* <p>Find and return a LanguageBreakEngine that can find the desired121* kind of break for the set of characters to which the supplied122* character belongs. It is up to the set of available engines to123* determine what the sets of characters are.</p>124*125* @param c A character that begins a run for which a LanguageBreakEngine is126* sought.127* @return A LanguageBreakEngine with the desired characteristics, or 0.128*/129virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;130131};132133/*******************************************************************134* UnhandledEngine135*/136137/**138* <p>UnhandledEngine is a special subclass of LanguageBreakEngine that139* handles characters that no other LanguageBreakEngine is available to140* handle. It is told the character and the type of break; at its141* discretion it may handle more than the specified character (e.g.,142* the entire script to which that character belongs.</p>143*144* <p>UnhandledEngines may not be shared between threads without145* external synchronization.</p>146*/147148class UnhandledEngine : public LanguageBreakEngine {149private:150151/**152* The sets of characters handled.153* @internal154*/155156UnicodeSet *fHandled;157158public:159160/**161* <p>Default constructor.</p>162*163*/164UnhandledEngine(UErrorCode &status);165166/**167* <p>Virtual destructor.</p>168*/169virtual ~UnhandledEngine();170171/**172* <p>Indicate whether this engine handles a particular character for173* a particular kind of break.</p>174*175* @param c A character which begins a run that the engine might handle176* @return true if this engine handles the particular character and break177* type.178*/179virtual UBool handles(UChar32 c) const override;180181/**182* <p>Find any breaks within a run in the supplied text.</p>183*184* @param text A UText representing the text (TODO: UText). The185* iterator is left at the end of the run of characters which the engine186* is capable of handling.187* @param startPos The start of the run within the supplied text.188* @param endPos The end of the run within the supplied text.189* @param foundBreaks An allocated C array of the breaks found, if any190* @param status Information on any errors encountered.191* @return The number of breaks found.192*/193virtual int32_t findBreaks( UText *text,194int32_t startPos,195int32_t endPos,196UVector32 &foundBreaks,197UBool isPhraseBreaking,198UErrorCode &status) const override;199200/**201* <p>Tell the engine to handle a particular character and break type.</p>202*203* @param c A character which the engine should handle204*/205virtual void handleCharacter(UChar32 c);206207};208209/*******************************************************************210* ICULanguageBreakFactory211*/212213/**214* <p>ICULanguageBreakFactory is the default LanguageBreakFactory for215* ICU. It creates dictionary-based LanguageBreakEngines from dictionary216* data in the ICU data file.</p>217*/218class ICULanguageBreakFactory : public LanguageBreakFactory {219private:220221/**222* The stack of break engines created by this factory223* @internal224*/225226UStack *fEngines;227228public:229230/**231* <p>Standard constructor.</p>232*233*/234ICULanguageBreakFactory(UErrorCode &status);235236/**237* <p>Virtual destructor.</p>238*/239virtual ~ICULanguageBreakFactory();240241/**242* <p>Find and return a LanguageBreakEngine that can find the desired243* kind of break for the set of characters to which the supplied244* character belongs. It is up to the set of available engines to245* determine what the sets of characters are.</p>246*247* @param c A character that begins a run for which a LanguageBreakEngine is248* sought.249* @return A LanguageBreakEngine with the desired characteristics, or 0.250*/251virtual const LanguageBreakEngine *getEngineFor(UChar32 c) override;252253protected:254/**255* <p>Create a LanguageBreakEngine for the set of characters to which256* the supplied character belongs, for the specified break type.</p>257*258* @param c A character that begins a run for which a LanguageBreakEngine is259* sought.260* @return A LanguageBreakEngine with the desired characteristics, or 0.261*/262virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);263264/**265* <p>Create a DictionaryMatcher for the specified script and break type.</p>266* @param script An ISO 15924 script code that identifies the dictionary to be267* created.268* @return A DictionaryMatcher with the desired characteristics, or NULL.269*/270virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);271};272273U_NAMESPACE_END274275/* BRKENG_H */276#endif277278279