Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/icu4c/common/brkeng.h
9906 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/**
4
************************************************************************************
5
* Copyright (C) 2006-2012, International Business Machines Corporation and others. *
6
* All Rights Reserved. *
7
************************************************************************************
8
*/
9
10
#ifndef BRKENG_H
11
#define BRKENG_H
12
13
#include "unicode/umisc.h"
14
#include "unicode/utypes.h"
15
#include "unicode/uobject.h"
16
#include "unicode/utext.h"
17
#include "unicode/uscript.h"
18
19
U_NAMESPACE_BEGIN
20
21
class UnicodeSet;
22
class UStack;
23
class UVector32;
24
class DictionaryMatcher;
25
class ExternalBreakEngine;
26
27
/*******************************************************************
28
* LanguageBreakEngine
29
*/
30
31
/**
32
* <p>LanguageBreakEngines implement language-specific knowledge for
33
* finding text boundaries within a run of characters belonging to a
34
* specific set. The boundaries will be of a specific kind, e.g. word,
35
* line, etc.</p>
36
*
37
* <p>LanguageBreakEngines should normally be implemented so as to
38
* be shared between threads without locking.</p>
39
*/
40
class LanguageBreakEngine : public UObject {
41
public:
42
43
/**
44
* <p>Default constructor.</p>
45
*
46
*/
47
LanguageBreakEngine();
48
49
/**
50
* <p>Virtual destructor.</p>
51
*/
52
virtual ~LanguageBreakEngine();
53
54
/**
55
* <p>Indicate whether this engine handles a particular character for
56
* a particular kind of break.</p>
57
*
58
* @param c A character which begins a run that the engine might handle
59
* @param locale The locale.
60
* @return true if this engine handles the particular character and break
61
* type.
62
*/
63
virtual UBool handles(UChar32 c, const char* locale) const = 0;
64
65
/**
66
* <p>Find any breaks within a run in the supplied text.</p>
67
*
68
* @param text A UText representing the text. The
69
* iterator is left at the end of the run of characters which the engine
70
* is capable of handling.
71
* @param startPos The start of the run within the supplied text.
72
* @param endPos The end of the run within the supplied text.
73
* @param foundBreaks A Vector of int32_t to receive the breaks.
74
* @param status Information on any errors encountered.
75
* @return The number of breaks found.
76
*/
77
virtual int32_t findBreaks( UText *text,
78
int32_t startPos,
79
int32_t endPos,
80
UVector32 &foundBreaks,
81
UBool isPhraseBreaking,
82
UErrorCode &status) const = 0;
83
84
};
85
86
/*******************************************************************
87
* BreakEngineWrapper
88
*/
89
90
/**
91
* <p>BreakEngineWrapper implement LanguageBreakEngine by
92
* a thin wrapper that delegate the task to ExternalBreakEngine
93
* </p>
94
*/
95
class BreakEngineWrapper : public LanguageBreakEngine {
96
public:
97
98
BreakEngineWrapper(ExternalBreakEngine* engine, UErrorCode &status);
99
100
virtual ~BreakEngineWrapper();
101
102
virtual UBool handles(UChar32 c, const char* locale) const override;
103
104
virtual int32_t findBreaks( UText *text,
105
int32_t startPos,
106
int32_t endPos,
107
UVector32 &foundBreaks,
108
UBool isPhraseBreaking,
109
UErrorCode &status) const override;
110
111
private:
112
LocalPointer<ExternalBreakEngine> delegate;
113
};
114
115
/*******************************************************************
116
* LanguageBreakFactory
117
*/
118
119
/**
120
* <p>LanguageBreakFactorys find and return a LanguageBreakEngine
121
* that can determine breaks for characters in a specific set, if
122
* such an object can be found.</p>
123
*
124
* <p>If a LanguageBreakFactory is to be shared between threads,
125
* appropriate synchronization must be used; there is none internal
126
* to the factory.</p>
127
*
128
* <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
129
* normally be shared between threads without synchronization, unless
130
* the specific subclass of LanguageBreakFactory indicates otherwise.</p>
131
*
132
* <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
133
* it returns when it itself is deleted, unless the specific subclass of
134
* LanguageBreakFactory indicates otherwise. Naturally, the factory should
135
* not be deleted until the LanguageBreakEngines it has returned are no
136
* longer needed.</p>
137
*/
138
class LanguageBreakFactory : public UMemory {
139
public:
140
141
/**
142
* <p>Default constructor.</p>
143
*
144
*/
145
LanguageBreakFactory();
146
147
/**
148
* <p>Virtual destructor.</p>
149
*/
150
virtual ~LanguageBreakFactory();
151
152
/**
153
* <p>Find and return a LanguageBreakEngine that can find the desired
154
* kind of break for the set of characters to which the supplied
155
* character belongs. It is up to the set of available engines to
156
* determine what the sets of characters are.</p>
157
*
158
* @param c A character that begins a run for which a LanguageBreakEngine is
159
* sought.
160
* @param locale The locale.
161
* @return A LanguageBreakEngine with the desired characteristics, or 0.
162
*/
163
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) = 0;
164
165
};
166
167
/*******************************************************************
168
* UnhandledEngine
169
*/
170
171
/**
172
* <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
173
* handles characters that no other LanguageBreakEngine is available to
174
* handle. It is told the character and the type of break; at its
175
* discretion it may handle more than the specified character (e.g.,
176
* the entire script to which that character belongs.</p>
177
*
178
* <p>UnhandledEngines may not be shared between threads without
179
* external synchronization.</p>
180
*/
181
182
class UnhandledEngine : public LanguageBreakEngine {
183
private:
184
185
/**
186
* The sets of characters handled.
187
* @internal
188
*/
189
190
UnicodeSet *fHandled;
191
192
public:
193
194
/**
195
* <p>Default constructor.</p>
196
*
197
*/
198
UnhandledEngine(UErrorCode &status);
199
200
/**
201
* <p>Virtual destructor.</p>
202
*/
203
virtual ~UnhandledEngine();
204
205
/**
206
* <p>Indicate whether this engine handles a particular character for
207
* a particular kind of break.</p>
208
*
209
* @param c A character which begins a run that the engine might handle
210
* @param locale The locale.
211
* @return true if this engine handles the particular character and break
212
* type.
213
*/
214
virtual UBool handles(UChar32 c, const char* locale) const override;
215
216
/**
217
* <p>Find any breaks within a run in the supplied text.</p>
218
*
219
* @param text A UText representing the text (TODO: UText). The
220
* iterator is left at the end of the run of characters which the engine
221
* is capable of handling.
222
* @param startPos The start of the run within the supplied text.
223
* @param endPos The end of the run within the supplied text.
224
* @param foundBreaks An allocated C array of the breaks found, if any
225
* @param status Information on any errors encountered.
226
* @return The number of breaks found.
227
*/
228
virtual int32_t findBreaks( UText *text,
229
int32_t startPos,
230
int32_t endPos,
231
UVector32 &foundBreaks,
232
UBool isPhraseBreaking,
233
UErrorCode &status) const override;
234
235
/**
236
* <p>Tell the engine to handle a particular character and break type.</p>
237
*
238
* @param c A character which the engine should handle
239
*/
240
virtual void handleCharacter(UChar32 c);
241
242
};
243
244
/*******************************************************************
245
* ICULanguageBreakFactory
246
*/
247
248
/**
249
* <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
250
* ICU. It creates dictionary-based LanguageBreakEngines from dictionary
251
* data in the ICU data file.</p>
252
*/
253
class ICULanguageBreakFactory : public LanguageBreakFactory {
254
private:
255
256
/**
257
* The stack of break engines created by this factory
258
* @internal
259
*/
260
261
UStack *fEngines;
262
263
public:
264
265
/**
266
* <p>Standard constructor.</p>
267
*
268
*/
269
ICULanguageBreakFactory(UErrorCode &status);
270
271
/**
272
* <p>Virtual destructor.</p>
273
*/
274
virtual ~ICULanguageBreakFactory();
275
276
/**
277
* <p>Find and return a LanguageBreakEngine that can find the desired
278
* kind of break for the set of characters to which the supplied
279
* character belongs. It is up to the set of available engines to
280
* determine what the sets of characters are.</p>
281
*
282
* @param c A character that begins a run for which a LanguageBreakEngine is
283
* sought.
284
* @param locale The locale.
285
* @return A LanguageBreakEngine with the desired characteristics, or 0.
286
*/
287
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) override;
288
289
/**
290
* Add and adopt the engine and return an URegistryKey.
291
* @param engine The ExternalBreakEngine to be added and adopt. The caller
292
* pass the ownership and should not release the memory after this.
293
* @param status the error code.
294
*/
295
virtual void addExternalEngine(ExternalBreakEngine* engine, UErrorCode& status);
296
297
protected:
298
/**
299
* <p>Create a LanguageBreakEngine for the set of characters to which
300
* the supplied character belongs, for the specified break type.</p>
301
*
302
* @param c A character that begins a run for which a LanguageBreakEngine is
303
* sought.
304
* @param locale The locale.
305
* @return A LanguageBreakEngine with the desired characteristics, or 0.
306
*/
307
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, const char* locale);
308
309
/**
310
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
311
* @param script An ISO 15924 script code that identifies the dictionary to be
312
* created.
313
* @return A DictionaryMatcher with the desired characteristics, or nullptr.
314
*/
315
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
316
317
private:
318
void ensureEngines(UErrorCode& status);
319
};
320
321
U_NAMESPACE_END
322
323
/* BRKENG_H */
324
#endif
325
326