// © 2016 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html2/**3************************************************************************************4* Copyright (C) 2006-2012, International Business Machines Corporation and others. *5* All Rights Reserved. *6************************************************************************************7*/89#ifndef BRKENG_H10#define BRKENG_H1112#include "unicode/umisc.h"13#include "unicode/utypes.h"14#include "unicode/uobject.h"15#include "unicode/utext.h"16#include "unicode/uscript.h"1718U_NAMESPACE_BEGIN1920class UnicodeSet;21class UStack;22class UVector32;23class DictionaryMatcher;24class ExternalBreakEngine;2526/*******************************************************************27* LanguageBreakEngine28*/2930/**31* <p>LanguageBreakEngines implement language-specific knowledge for32* finding text boundaries within a run of characters belonging to a33* specific set. The boundaries will be of a specific kind, e.g. word,34* line, etc.</p>35*36* <p>LanguageBreakEngines should normally be implemented so as to37* be shared between threads without locking.</p>38*/39class LanguageBreakEngine : public UObject {40public:4142/**43* <p>Default constructor.</p>44*45*/46LanguageBreakEngine();4748/**49* <p>Virtual destructor.</p>50*/51virtual ~LanguageBreakEngine();5253/**54* <p>Indicate whether this engine handles a particular character for55* a particular kind of break.</p>56*57* @param c A character which begins a run that the engine might handle58* @param locale The locale.59* @return true if this engine handles the particular character and break60* type.61*/62virtual UBool handles(UChar32 c, const char* locale) const = 0;6364/**65* <p>Find any breaks within a run in the supplied text.</p>66*67* @param text A UText representing the text. The68* iterator is left at the end of the run of characters which the engine69* is capable of handling.70* @param startPos The start of the run within the supplied text.71* @param endPos The end of the run within the supplied text.72* @param foundBreaks A Vector of int32_t to receive the breaks.73* @param status Information on any errors encountered.74* @return The number of breaks found.75*/76virtual int32_t findBreaks( UText *text,77int32_t startPos,78int32_t endPos,79UVector32 &foundBreaks,80UBool isPhraseBreaking,81UErrorCode &status) const = 0;8283};8485/*******************************************************************86* BreakEngineWrapper87*/8889/**90* <p>BreakEngineWrapper implement LanguageBreakEngine by91* a thin wrapper that delegate the task to ExternalBreakEngine92* </p>93*/94class BreakEngineWrapper : public LanguageBreakEngine {95public:9697BreakEngineWrapper(ExternalBreakEngine* engine, UErrorCode &status);9899virtual ~BreakEngineWrapper();100101virtual UBool handles(UChar32 c, const char* locale) const override;102103virtual int32_t findBreaks( UText *text,104int32_t startPos,105int32_t endPos,106UVector32 &foundBreaks,107UBool isPhraseBreaking,108UErrorCode &status) const override;109110private:111LocalPointer<ExternalBreakEngine> delegate;112};113114/*******************************************************************115* LanguageBreakFactory116*/117118/**119* <p>LanguageBreakFactorys find and return a LanguageBreakEngine120* that can determine breaks for characters in a specific set, if121* such an object can be found.</p>122*123* <p>If a LanguageBreakFactory is to be shared between threads,124* appropriate synchronization must be used; there is none internal125* to the factory.</p>126*127* <p>A LanguageBreakEngine returned by a LanguageBreakFactory can128* normally be shared between threads without synchronization, unless129* the specific subclass of LanguageBreakFactory indicates otherwise.</p>130*131* <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine132* it returns when it itself is deleted, unless the specific subclass of133* LanguageBreakFactory indicates otherwise. Naturally, the factory should134* not be deleted until the LanguageBreakEngines it has returned are no135* longer needed.</p>136*/137class LanguageBreakFactory : public UMemory {138public:139140/**141* <p>Default constructor.</p>142*143*/144LanguageBreakFactory();145146/**147* <p>Virtual destructor.</p>148*/149virtual ~LanguageBreakFactory();150151/**152* <p>Find and return a LanguageBreakEngine that can find the desired153* kind of break for the set of characters to which the supplied154* character belongs. It is up to the set of available engines to155* determine what the sets of characters are.</p>156*157* @param c A character that begins a run for which a LanguageBreakEngine is158* sought.159* @param locale The locale.160* @return A LanguageBreakEngine with the desired characteristics, or 0.161*/162virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) = 0;163164};165166/*******************************************************************167* UnhandledEngine168*/169170/**171* <p>UnhandledEngine is a special subclass of LanguageBreakEngine that172* handles characters that no other LanguageBreakEngine is available to173* handle. It is told the character and the type of break; at its174* discretion it may handle more than the specified character (e.g.,175* the entire script to which that character belongs.</p>176*177* <p>UnhandledEngines may not be shared between threads without178* external synchronization.</p>179*/180181class UnhandledEngine : public LanguageBreakEngine {182private:183184/**185* The sets of characters handled.186* @internal187*/188189UnicodeSet *fHandled;190191public:192193/**194* <p>Default constructor.</p>195*196*/197UnhandledEngine(UErrorCode &status);198199/**200* <p>Virtual destructor.</p>201*/202virtual ~UnhandledEngine();203204/**205* <p>Indicate whether this engine handles a particular character for206* a particular kind of break.</p>207*208* @param c A character which begins a run that the engine might handle209* @param locale The locale.210* @return true if this engine handles the particular character and break211* type.212*/213virtual UBool handles(UChar32 c, const char* locale) const override;214215/**216* <p>Find any breaks within a run in the supplied text.</p>217*218* @param text A UText representing the text (TODO: UText). The219* iterator is left at the end of the run of characters which the engine220* is capable of handling.221* @param startPos The start of the run within the supplied text.222* @param endPos The end of the run within the supplied text.223* @param foundBreaks An allocated C array of the breaks found, if any224* @param status Information on any errors encountered.225* @return The number of breaks found.226*/227virtual int32_t findBreaks( UText *text,228int32_t startPos,229int32_t endPos,230UVector32 &foundBreaks,231UBool isPhraseBreaking,232UErrorCode &status) const override;233234/**235* <p>Tell the engine to handle a particular character and break type.</p>236*237* @param c A character which the engine should handle238*/239virtual void handleCharacter(UChar32 c);240241};242243/*******************************************************************244* ICULanguageBreakFactory245*/246247/**248* <p>ICULanguageBreakFactory is the default LanguageBreakFactory for249* ICU. It creates dictionary-based LanguageBreakEngines from dictionary250* data in the ICU data file.</p>251*/252class ICULanguageBreakFactory : public LanguageBreakFactory {253private:254255/**256* The stack of break engines created by this factory257* @internal258*/259260UStack *fEngines;261262public:263264/**265* <p>Standard constructor.</p>266*267*/268ICULanguageBreakFactory(UErrorCode &status);269270/**271* <p>Virtual destructor.</p>272*/273virtual ~ICULanguageBreakFactory();274275/**276* <p>Find and return a LanguageBreakEngine that can find the desired277* kind of break for the set of characters to which the supplied278* character belongs. It is up to the set of available engines to279* determine what the sets of characters are.</p>280*281* @param c A character that begins a run for which a LanguageBreakEngine is282* sought.283* @param locale The locale.284* @return A LanguageBreakEngine with the desired characteristics, or 0.285*/286virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) override;287288/**289* Add and adopt the engine and return an URegistryKey.290* @param engine The ExternalBreakEngine to be added and adopt. The caller291* pass the ownership and should not release the memory after this.292* @param status the error code.293*/294virtual void addExternalEngine(ExternalBreakEngine* engine, UErrorCode& status);295296protected:297/**298* <p>Create a LanguageBreakEngine for the set of characters to which299* the supplied character belongs, for the specified break type.</p>300*301* @param c A character that begins a run for which a LanguageBreakEngine is302* sought.303* @param locale The locale.304* @return A LanguageBreakEngine with the desired characteristics, or 0.305*/306virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, const char* locale);307308/**309* <p>Create a DictionaryMatcher for the specified script and break type.</p>310* @param script An ISO 15924 script code that identifies the dictionary to be311* created.312* @return A DictionaryMatcher with the desired characteristics, or nullptr.313*/314virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);315316private:317void ensureEngines(UErrorCode& status);318};319320U_NAMESPACE_END321322/* BRKENG_H */323#endif324325326