Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/icu4c/common/brkiter.cpp
9903 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 1997-2015, International Business Machines Corporation and
6
* others. All Rights Reserved.
7
*******************************************************************************
8
*
9
* File brkiter.cpp
10
*
11
* Modification History:
12
*
13
* Date Name Description
14
* 02/18/97 aliu Converted from OpenClass. Added DONE.
15
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
16
*****************************************************************************************
17
*/
18
19
// *****************************************************************************
20
// This file was generated from the java source file BreakIterator.java
21
// *****************************************************************************
22
23
#include "unicode/utypes.h"
24
25
#if !UCONFIG_NO_BREAK_ITERATION
26
27
#include "unicode/rbbi.h"
28
#include "unicode/brkiter.h"
29
#include "unicode/udata.h"
30
#include "unicode/uloc.h"
31
#include "unicode/ures.h"
32
#include "unicode/ustring.h"
33
#include "unicode/filteredbrk.h"
34
#include "bytesinkutil.h"
35
#include "ucln_cmn.h"
36
#include "cstring.h"
37
#include "umutex.h"
38
#include "servloc.h"
39
#include "locbased.h"
40
#include "uresimp.h"
41
#include "uassert.h"
42
#include "ubrkimpl.h"
43
#include "utracimp.h"
44
#include "charstr.h"
45
46
// *****************************************************************************
47
// class BreakIterator
48
// This class implements methods for finding the location of boundaries in text.
49
// Instances of BreakIterator maintain a current position and scan over text
50
// returning the index of characters where boundaries occur.
51
// *****************************************************************************
52
53
U_NAMESPACE_BEGIN
54
55
// -------------------------------------
56
57
BreakIterator*
58
BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status)
59
{
60
char fnbuff[256];
61
char ext[4]={'\0'};
62
CharString actual;
63
int32_t size;
64
const char16_t* brkfname = nullptr;
65
UResourceBundle brkRulesStack;
66
UResourceBundle brkNameStack;
67
UResourceBundle *brkRules = &brkRulesStack;
68
UResourceBundle *brkName = &brkNameStack;
69
RuleBasedBreakIterator *result = nullptr;
70
71
if (U_FAILURE(status))
72
return nullptr;
73
74
ures_initStackObject(brkRules);
75
ures_initStackObject(brkName);
76
77
// Get the locale
78
UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status);
79
80
// Get the "boundaries" array.
81
if (U_SUCCESS(status)) {
82
brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
83
// Get the string object naming the rules file
84
brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
85
// Get the actual string
86
brkfname = ures_getString(brkName, &size, &status);
87
U_ASSERT((size_t)size<sizeof(fnbuff));
88
if (static_cast<size_t>(size) >= sizeof(fnbuff)) {
89
size=0;
90
if (U_SUCCESS(status)) {
91
status = U_BUFFER_OVERFLOW_ERROR;
92
}
93
}
94
95
// Use the string if we found it
96
if (U_SUCCESS(status) && brkfname) {
97
actual.append(ures_getLocaleInternal(brkName, &status), -1, status);
98
99
char16_t* extStart=u_strchr(brkfname, 0x002e);
100
int len = 0;
101
if (extStart != nullptr){
102
len = static_cast<int>(extStart - brkfname);
103
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
104
u_UCharsToChars(brkfname, fnbuff, len);
105
}
106
fnbuff[len]=0; // nul terminate
107
}
108
}
109
110
ures_close(brkRules);
111
ures_close(brkName);
112
113
UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
114
if (U_FAILURE(status)) {
115
ures_close(b);
116
return nullptr;
117
}
118
119
// Create a RuleBasedBreakIterator
120
result = new RuleBasedBreakIterator(file, uprv_strstr(type, "phrase") != nullptr, status);
121
122
// If there is a result, set the valid locale and actual locale, and the kind
123
if (U_SUCCESS(status) && result != nullptr) {
124
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
125
126
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
127
actual.data(), status);
128
LocaleBased::setLocaleID(loc.getName(), result->requestLocale, status);
129
}
130
131
ures_close(b);
132
133
if (U_FAILURE(status) && result != nullptr) { // Sometimes redundant check, but simple
134
delete result;
135
return nullptr;
136
}
137
138
if (result == nullptr) {
139
udata_close(file);
140
if (U_SUCCESS(status)) {
141
status = U_MEMORY_ALLOCATION_ERROR;
142
}
143
}
144
145
return result;
146
}
147
148
// Creates a break iterator for word breaks.
149
BreakIterator* U_EXPORT2
150
BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
151
{
152
return createInstance(key, UBRK_WORD, status);
153
}
154
155
// -------------------------------------
156
157
// Creates a break iterator for line breaks.
158
BreakIterator* U_EXPORT2
159
BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
160
{
161
return createInstance(key, UBRK_LINE, status);
162
}
163
164
// -------------------------------------
165
166
// Creates a break iterator for character breaks.
167
BreakIterator* U_EXPORT2
168
BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
169
{
170
return createInstance(key, UBRK_CHARACTER, status);
171
}
172
173
// -------------------------------------
174
175
// Creates a break iterator for sentence breaks.
176
BreakIterator* U_EXPORT2
177
BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
178
{
179
return createInstance(key, UBRK_SENTENCE, status);
180
}
181
182
// -------------------------------------
183
184
// Creates a break iterator for title casing breaks.
185
BreakIterator* U_EXPORT2
186
BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
187
{
188
return createInstance(key, UBRK_TITLE, status);
189
}
190
191
// -------------------------------------
192
193
// Gets all the available locales that has localized text boundary data.
194
const Locale* U_EXPORT2
195
BreakIterator::getAvailableLocales(int32_t& count)
196
{
197
return Locale::getAvailableLocales(count);
198
}
199
200
// ------------------------------------------
201
//
202
// Constructors, destructor and assignment operator
203
//
204
//-------------------------------------------
205
206
BreakIterator::BreakIterator()
207
{
208
}
209
210
BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
211
UErrorCode status = U_ZERO_ERROR;
212
U_LOCALE_BASED(locBased, *this);
213
locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
214
LocaleBased::setLocaleID(other.requestLocale, requestLocale, status);
215
U_ASSERT(U_SUCCESS(status));
216
}
217
218
BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
219
if (this != &other) {
220
UErrorCode status = U_ZERO_ERROR;
221
U_LOCALE_BASED(locBased, *this);
222
locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
223
LocaleBased::setLocaleID(other.requestLocale, requestLocale, status);
224
U_ASSERT(U_SUCCESS(status));
225
}
226
return *this;
227
}
228
229
BreakIterator::~BreakIterator()
230
{
231
delete validLocale;
232
delete actualLocale;
233
delete requestLocale;
234
}
235
236
// ------------------------------------------
237
//
238
// Registration
239
//
240
//-------------------------------------------
241
#if !UCONFIG_NO_SERVICE
242
243
// -------------------------------------
244
245
class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
246
public:
247
virtual ~ICUBreakIteratorFactory();
248
protected:
249
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const override {
250
return BreakIterator::makeInstance(loc, kind, status);
251
}
252
};
253
254
ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {}
255
256
// -------------------------------------
257
258
class ICUBreakIteratorService : public ICULocaleService {
259
public:
260
ICUBreakIteratorService()
261
: ICULocaleService(UNICODE_STRING("Break Iterator", 14))
262
{
263
UErrorCode status = U_ZERO_ERROR;
264
registerFactory(new ICUBreakIteratorFactory(), status);
265
}
266
267
virtual ~ICUBreakIteratorService();
268
269
virtual UObject* cloneInstance(UObject* instance) const override {
270
return ((BreakIterator*)instance)->clone();
271
}
272
273
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const override {
274
LocaleKey& lkey = static_cast<LocaleKey&>(const_cast<ICUServiceKey&>(key));
275
int32_t kind = lkey.kind();
276
Locale loc;
277
lkey.currentLocale(loc);
278
return BreakIterator::makeInstance(loc, kind, status);
279
}
280
281
virtual UBool isDefault() const override {
282
return countFactories() == 1;
283
}
284
};
285
286
ICUBreakIteratorService::~ICUBreakIteratorService() {}
287
288
// -------------------------------------
289
290
// defined in ucln_cmn.h
291
U_NAMESPACE_END
292
293
static icu::UInitOnce gInitOnceBrkiter {};
294
static icu::ICULocaleService* gService = nullptr;
295
296
297
298
/**
299
* Release all static memory held by breakiterator.
300
*/
301
U_CDECL_BEGIN
302
static UBool U_CALLCONV breakiterator_cleanup() {
303
#if !UCONFIG_NO_SERVICE
304
if (gService) {
305
delete gService;
306
gService = nullptr;
307
}
308
gInitOnceBrkiter.reset();
309
#endif
310
return true;
311
}
312
U_CDECL_END
313
U_NAMESPACE_BEGIN
314
315
static void U_CALLCONV
316
initService() {
317
gService = new ICUBreakIteratorService();
318
ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
319
}
320
321
static ICULocaleService*
322
getService()
323
{
324
umtx_initOnce(gInitOnceBrkiter, &initService);
325
return gService;
326
}
327
328
329
// -------------------------------------
330
331
static inline UBool
332
hasService()
333
{
334
return !gInitOnceBrkiter.isReset() && getService() != nullptr;
335
}
336
337
// -------------------------------------
338
339
URegistryKey U_EXPORT2
340
BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
341
{
342
ICULocaleService *service = getService();
343
if (service == nullptr) {
344
status = U_MEMORY_ALLOCATION_ERROR;
345
return nullptr;
346
}
347
return service->registerInstance(toAdopt, locale, kind, status);
348
}
349
350
// -------------------------------------
351
352
UBool U_EXPORT2
353
BreakIterator::unregister(URegistryKey key, UErrorCode& status)
354
{
355
if (U_SUCCESS(status)) {
356
if (hasService()) {
357
return gService->unregister(key, status);
358
}
359
status = U_MEMORY_ALLOCATION_ERROR;
360
}
361
return false;
362
}
363
364
// -------------------------------------
365
366
StringEnumeration* U_EXPORT2
367
BreakIterator::getAvailableLocales()
368
{
369
ICULocaleService *service = getService();
370
if (service == nullptr) {
371
return nullptr;
372
}
373
return service->getAvailableLocales();
374
}
375
#endif /* UCONFIG_NO_SERVICE */
376
377
// -------------------------------------
378
379
BreakIterator*
380
BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
381
{
382
if (U_FAILURE(status)) {
383
return nullptr;
384
}
385
386
#if !UCONFIG_NO_SERVICE
387
if (hasService()) {
388
Locale actualLoc("");
389
BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
390
// TODO: The way the service code works in ICU 2.8 is that if
391
// there is a real registered break iterator, the actualLoc
392
// will be populated, but if the handleDefault path is taken
393
// (because nothing is registered that can handle the
394
// requested locale) then the actualLoc comes back empty. In
395
// that case, the returned object already has its actual/valid
396
// locale data populated (by makeInstance, which is what
397
// handleDefault calls), so we don't touch it. YES, A COMMENT
398
// THIS LONG is a sign of bad code -- so the action item is to
399
// revisit this in ICU 3.0 and clean it up/fix it/remove it.
400
if (U_SUCCESS(status) && (result != nullptr) && *actualLoc.getName() != 0) {
401
U_LOCALE_BASED(locBased, *result);
402
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName(), status);
403
}
404
return result;
405
}
406
else
407
#endif
408
{
409
return makeInstance(loc, kind, status);
410
}
411
}
412
413
// -------------------------------------
414
enum { kKeyValueLenMax = 32 };
415
416
BreakIterator*
417
BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
418
{
419
420
if (U_FAILURE(status)) {
421
return nullptr;
422
}
423
424
BreakIterator *result = nullptr;
425
switch (kind) {
426
case UBRK_CHARACTER:
427
{
428
UTRACE_ENTRY(UTRACE_UBRK_CREATE_CHARACTER);
429
result = BreakIterator::buildInstance(loc, "grapheme", status);
430
UTRACE_EXIT_STATUS(status);
431
}
432
break;
433
case UBRK_WORD:
434
{
435
UTRACE_ENTRY(UTRACE_UBRK_CREATE_WORD);
436
result = BreakIterator::buildInstance(loc, "word", status);
437
UTRACE_EXIT_STATUS(status);
438
}
439
break;
440
case UBRK_LINE:
441
{
442
char lb_lw[kKeyValueLenMax];
443
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
444
uprv_strcpy(lb_lw, "line");
445
UErrorCode kvStatus = U_ZERO_ERROR;
446
auto value = loc.getKeywordValue<CharString>("lb", kvStatus);
447
if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
448
uprv_strcat(lb_lw, "_");
449
uprv_strcat(lb_lw, value.data());
450
}
451
// lw=phrase is only supported in Japanese and Korean
452
if (uprv_strcmp(loc.getLanguage(), "ja") == 0 || uprv_strcmp(loc.getLanguage(), "ko") == 0) {
453
value = loc.getKeywordValue<CharString>("lw", kvStatus);
454
if (U_SUCCESS(kvStatus) && value == "phrase") {
455
uprv_strcat(lb_lw, "_");
456
uprv_strcat(lb_lw, value.data());
457
}
458
}
459
result = BreakIterator::buildInstance(loc, lb_lw, status);
460
461
UTRACE_DATA1(UTRACE_INFO, "lb_lw=%s", lb_lw);
462
UTRACE_EXIT_STATUS(status);
463
}
464
break;
465
case UBRK_SENTENCE:
466
{
467
UTRACE_ENTRY(UTRACE_UBRK_CREATE_SENTENCE);
468
result = BreakIterator::buildInstance(loc, "sentence", status);
469
#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
470
char ssKeyValue[kKeyValueLenMax] = {0};
471
UErrorCode kvStatus = U_ZERO_ERROR;
472
int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
473
if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) {
474
FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus);
475
if (U_SUCCESS(kvStatus)) {
476
result = fbiBuilder->build(result, status);
477
delete fbiBuilder;
478
}
479
}
480
#endif
481
UTRACE_EXIT_STATUS(status);
482
}
483
break;
484
case UBRK_TITLE:
485
{
486
UTRACE_ENTRY(UTRACE_UBRK_CREATE_TITLE);
487
result = BreakIterator::buildInstance(loc, "title", status);
488
UTRACE_EXIT_STATUS(status);
489
}
490
break;
491
default:
492
status = U_ILLEGAL_ARGUMENT_ERROR;
493
}
494
495
if (U_FAILURE(status)) {
496
delete result;
497
return nullptr;
498
}
499
500
return result;
501
}
502
503
Locale
504
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
505
if (U_FAILURE(status)) {
506
return Locale::getRoot();
507
}
508
if (type == ULOC_REQUESTED_LOCALE) {
509
return requestLocale == nullptr ?
510
Locale::getRoot() : Locale(requestLocale->data());
511
}
512
return LocaleBased::getLocale(validLocale, actualLocale, type, status);
513
}
514
515
const char *
516
BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
517
if (U_FAILURE(status)) {
518
return nullptr;
519
}
520
if (type == ULOC_REQUESTED_LOCALE) {
521
return requestLocale == nullptr ? "" : requestLocale->data();
522
}
523
return LocaleBased::getLocaleID(validLocale, actualLocale, type, status);
524
}
525
526
527
// This implementation of getRuleStatus is a do-nothing stub, here to
528
// provide a default implementation for any derived BreakIterator classes that
529
// do not implement it themselves.
530
int32_t BreakIterator::getRuleStatus() const {
531
return 0;
532
}
533
534
// This implementation of getRuleStatusVec is a do-nothing stub, here to
535
// provide a default implementation for any derived BreakIterator classes that
536
// do not implement it themselves.
537
int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
538
if (U_FAILURE(status)) {
539
return 0;
540
}
541
if (capacity < 1) {
542
status = U_BUFFER_OVERFLOW_ERROR;
543
return 1;
544
}
545
*fillInVec = 0;
546
return 1;
547
}
548
549
BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
550
UErrorCode status = U_ZERO_ERROR;
551
U_LOCALE_BASED(locBased, (*this));
552
locBased.setLocaleIDs(valid.getName(), actual.getName(), status);
553
U_ASSERT(U_SUCCESS(status));
554
}
555
556
U_NAMESPACE_END
557
558
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
559
560
//eof
561
562