Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/icucommon/brkeng.h
12343 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/**
4
************************************************************************************
5
* Copyright (C) 2006-2012, International Business Machines Corporation and others. *
6
* All Rights Reserved. *
7
************************************************************************************
8
*/
9
10
#ifndef BRKENG_H
11
#define BRKENG_H
12
13
#include "unicode/utypes.h"
14
#include "unicode/uobject.h"
15
#include "unicode/utext.h"
16
#include "unicode/uscript.h"
17
18
U_NAMESPACE_BEGIN
19
20
class UnicodeSet;
21
class UStack;
22
class UVector32;
23
class DictionaryMatcher;
24
25
/*******************************************************************
26
* LanguageBreakEngine
27
*/
28
29
/**
30
* <p>LanguageBreakEngines implement language-specific knowledge for
31
* finding text boundaries within a run of characters belonging to a
32
* specific set. The boundaries will be of a specific kind, e.g. word,
33
* line, etc.</p>
34
*
35
* <p>LanguageBreakEngines should normally be implemented so as to
36
* be shared between threads without locking.</p>
37
*/
38
class LanguageBreakEngine : public UMemory {
39
public:
40
41
/**
42
* <p>Default constructor.</p>
43
*
44
*/
45
LanguageBreakEngine();
46
47
/**
48
* <p>Virtual destructor.</p>
49
*/
50
virtual ~LanguageBreakEngine();
51
52
/**
53
* <p>Indicate whether this engine handles a particular character for
54
* a particular kind of break.</p>
55
*
56
* @param c A character which begins a run that the engine might handle
57
* @return true if this engine handles the particular character and break
58
* type.
59
*/
60
virtual UBool handles(UChar32 c) const = 0;
61
62
/**
63
* <p>Find any breaks within a run in the supplied text.</p>
64
*
65
* @param text A UText representing the text. The
66
* iterator is left at the end of the run of characters which the engine
67
* is capable of handling.
68
* @param startPos The start of the run within the supplied text.
69
* @param endPos The end of the run within the supplied text.
70
* @param foundBreaks A Vector of int32_t to receive the breaks.
71
* @param status Information on any errors encountered.
72
* @return The number of breaks found.
73
*/
74
virtual int32_t findBreaks( UText *text,
75
int32_t startPos,
76
int32_t endPos,
77
UVector32 &foundBreaks,
78
UBool isPhraseBreaking,
79
UErrorCode &status) const = 0;
80
81
};
82
83
/*******************************************************************
84
* LanguageBreakFactory
85
*/
86
87
/**
88
* <p>LanguageBreakFactorys find and return a LanguageBreakEngine
89
* that can determine breaks for characters in a specific set, if
90
* such an object can be found.</p>
91
*
92
* <p>If a LanguageBreakFactory is to be shared between threads,
93
* appropriate synchronization must be used; there is none internal
94
* to the factory.</p>
95
*
96
* <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
97
* normally be shared between threads without synchronization, unless
98
* the specific subclass of LanguageBreakFactory indicates otherwise.</p>
99
*
100
* <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
101
* it returns when it itself is deleted, unless the specific subclass of
102
* LanguageBreakFactory indicates otherwise. Naturally, the factory should
103
* not be deleted until the LanguageBreakEngines it has returned are no
104
* longer needed.</p>
105
*/
106
class LanguageBreakFactory : public UMemory {
107
public:
108
109
/**
110
* <p>Default constructor.</p>
111
*
112
*/
113
LanguageBreakFactory();
114
115
/**
116
* <p>Virtual destructor.</p>
117
*/
118
virtual ~LanguageBreakFactory();
119
120
/**
121
* <p>Find and return a LanguageBreakEngine that can find the desired
122
* kind of break for the set of characters to which the supplied
123
* character belongs. It is up to the set of available engines to
124
* determine what the sets of characters are.</p>
125
*
126
* @param c A character that begins a run for which a LanguageBreakEngine is
127
* sought.
128
* @return A LanguageBreakEngine with the desired characteristics, or 0.
129
*/
130
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
131
132
};
133
134
/*******************************************************************
135
* UnhandledEngine
136
*/
137
138
/**
139
* <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
140
* handles characters that no other LanguageBreakEngine is available to
141
* handle. It is told the character and the type of break; at its
142
* discretion it may handle more than the specified character (e.g.,
143
* the entire script to which that character belongs.</p>
144
*
145
* <p>UnhandledEngines may not be shared between threads without
146
* external synchronization.</p>
147
*/
148
149
class UnhandledEngine : public LanguageBreakEngine {
150
private:
151
152
/**
153
* The sets of characters handled.
154
* @internal
155
*/
156
157
UnicodeSet *fHandled;
158
159
public:
160
161
/**
162
* <p>Default constructor.</p>
163
*
164
*/
165
UnhandledEngine(UErrorCode &status);
166
167
/**
168
* <p>Virtual destructor.</p>
169
*/
170
virtual ~UnhandledEngine();
171
172
/**
173
* <p>Indicate whether this engine handles a particular character for
174
* a particular kind of break.</p>
175
*
176
* @param c A character which begins a run that the engine might handle
177
* @return true if this engine handles the particular character and break
178
* type.
179
*/
180
virtual UBool handles(UChar32 c) const override;
181
182
/**
183
* <p>Find any breaks within a run in the supplied text.</p>
184
*
185
* @param text A UText representing the text (TODO: UText). The
186
* iterator is left at the end of the run of characters which the engine
187
* is capable of handling.
188
* @param startPos The start of the run within the supplied text.
189
* @param endPos The end of the run within the supplied text.
190
* @param foundBreaks An allocated C array of the breaks found, if any
191
* @param status Information on any errors encountered.
192
* @return The number of breaks found.
193
*/
194
virtual int32_t findBreaks( UText *text,
195
int32_t startPos,
196
int32_t endPos,
197
UVector32 &foundBreaks,
198
UBool isPhraseBreaking,
199
UErrorCode &status) const override;
200
201
/**
202
* <p>Tell the engine to handle a particular character and break type.</p>
203
*
204
* @param c A character which the engine should handle
205
*/
206
virtual void handleCharacter(UChar32 c);
207
208
};
209
210
/*******************************************************************
211
* ICULanguageBreakFactory
212
*/
213
214
/**
215
* <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
216
* ICU. It creates dictionary-based LanguageBreakEngines from dictionary
217
* data in the ICU data file.</p>
218
*/
219
class ICULanguageBreakFactory : public LanguageBreakFactory {
220
private:
221
222
/**
223
* The stack of break engines created by this factory
224
* @internal
225
*/
226
227
UStack *fEngines;
228
229
public:
230
231
/**
232
* <p>Standard constructor.</p>
233
*
234
*/
235
ICULanguageBreakFactory(UErrorCode &status);
236
237
/**
238
* <p>Virtual destructor.</p>
239
*/
240
virtual ~ICULanguageBreakFactory();
241
242
/**
243
* <p>Find and return a LanguageBreakEngine that can find the desired
244
* kind of break for the set of characters to which the supplied
245
* character belongs. It is up to the set of available engines to
246
* determine what the sets of characters are.</p>
247
*
248
* @param c A character that begins a run for which a LanguageBreakEngine is
249
* sought.
250
* @return A LanguageBreakEngine with the desired characteristics, or 0.
251
*/
252
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) override;
253
254
protected:
255
/**
256
* <p>Create a LanguageBreakEngine for the set of characters to which
257
* the supplied character belongs, for the specified break type.</p>
258
*
259
* @param c A character that begins a run for which a LanguageBreakEngine is
260
* sought.
261
* @return A LanguageBreakEngine with the desired characteristics, or 0.
262
*/
263
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
264
265
/**
266
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
267
* @param script An ISO 15924 script code that identifies the dictionary to be
268
* created.
269
* @return A DictionaryMatcher with the desired characteristics, or NULL.
270
*/
271
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
272
};
273
274
U_NAMESPACE_END
275
276
/* BRKENG_H */
277
#endif
278
279