Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/icucommon/brkiter.cpp
12343 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 1997-2015, International Business Machines Corporation and
6
* others. All Rights Reserved.
7
*******************************************************************************
8
*
9
* File brkiter.cpp
10
*
11
* Modification History:
12
*
13
* Date Name Description
14
* 02/18/97 aliu Converted from OpenClass. Added DONE.
15
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
16
*****************************************************************************************
17
*/
18
19
// *****************************************************************************
20
// This file was generated from the java source file BreakIterator.java
21
// *****************************************************************************
22
23
#include "unicode/utypes.h"
24
25
#if !UCONFIG_NO_BREAK_ITERATION
26
27
#include "unicode/rbbi.h"
28
#include "unicode/brkiter.h"
29
#include "unicode/udata.h"
30
#include "unicode/ures.h"
31
#include "unicode/ustring.h"
32
#include "unicode/filteredbrk.h"
33
#include "bytesinkutil.h"
34
#include "ucln_cmn.h"
35
#include "cstring.h"
36
#include "umutex.h"
37
#include "servloc.h"
38
#include "locbased.h"
39
#include "uresimp.h"
40
#include "uassert.h"
41
#include "ubrkimpl.h"
42
#include "utracimp.h"
43
#include "charstr.h"
44
45
// *****************************************************************************
46
// class BreakIterator
47
// This class implements methods for finding the location of boundaries in text.
48
// Instances of BreakIterator maintain a current position and scan over text
49
// returning the index of characters where boundaries occur.
50
// *****************************************************************************
51
52
U_NAMESPACE_BEGIN
53
54
// -------------------------------------
55
56
BreakIterator*
57
BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status)
58
{
59
char fnbuff[256];
60
char ext[4]={'\0'};
61
CharString actualLocale;
62
int32_t size;
63
const UChar* brkfname = NULL;
64
UResourceBundle brkRulesStack;
65
UResourceBundle brkNameStack;
66
UResourceBundle *brkRules = &brkRulesStack;
67
UResourceBundle *brkName = &brkNameStack;
68
RuleBasedBreakIterator *result = NULL;
69
70
if (U_FAILURE(status))
71
return NULL;
72
73
ures_initStackObject(brkRules);
74
ures_initStackObject(brkName);
75
76
// Get the locale
77
UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status);
78
79
// Get the "boundaries" array.
80
if (U_SUCCESS(status)) {
81
brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
82
// Get the string object naming the rules file
83
brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
84
// Get the actual string
85
brkfname = ures_getString(brkName, &size, &status);
86
U_ASSERT((size_t)size<sizeof(fnbuff));
87
if ((size_t)size>=sizeof(fnbuff)) {
88
size=0;
89
if (U_SUCCESS(status)) {
90
status = U_BUFFER_OVERFLOW_ERROR;
91
}
92
}
93
94
// Use the string if we found it
95
if (U_SUCCESS(status) && brkfname) {
96
actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);
97
98
UChar* extStart=u_strchr(brkfname, 0x002e);
99
int len = 0;
100
if(extStart!=NULL){
101
len = (int)(extStart-brkfname);
102
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
103
u_UCharsToChars(brkfname, fnbuff, len);
104
}
105
fnbuff[len]=0; // nul terminate
106
}
107
}
108
109
ures_close(brkRules);
110
ures_close(brkName);
111
112
UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
113
if (U_FAILURE(status)) {
114
ures_close(b);
115
return NULL;
116
}
117
118
// Create a RuleBasedBreakIterator
119
result = new RuleBasedBreakIterator(file, uprv_strstr(type, "phrase") != NULL, status);
120
121
// If there is a result, set the valid locale and actual locale, and the kind
122
if (U_SUCCESS(status) && result != NULL) {
123
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
124
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
125
actualLocale.data());
126
}
127
128
ures_close(b);
129
130
if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple
131
delete result;
132
return NULL;
133
}
134
135
if (result == NULL) {
136
udata_close(file);
137
if (U_SUCCESS(status)) {
138
status = U_MEMORY_ALLOCATION_ERROR;
139
}
140
}
141
142
return result;
143
}
144
145
// Creates a break iterator for word breaks.
146
BreakIterator* U_EXPORT2
147
BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
148
{
149
return createInstance(key, UBRK_WORD, status);
150
}
151
152
// -------------------------------------
153
154
// Creates a break iterator for line breaks.
155
BreakIterator* U_EXPORT2
156
BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
157
{
158
return createInstance(key, UBRK_LINE, status);
159
}
160
161
// -------------------------------------
162
163
// Creates a break iterator for character breaks.
164
BreakIterator* U_EXPORT2
165
BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
166
{
167
return createInstance(key, UBRK_CHARACTER, status);
168
}
169
170
// -------------------------------------
171
172
// Creates a break iterator for sentence breaks.
173
BreakIterator* U_EXPORT2
174
BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
175
{
176
return createInstance(key, UBRK_SENTENCE, status);
177
}
178
179
// -------------------------------------
180
181
// Creates a break iterator for title casing breaks.
182
BreakIterator* U_EXPORT2
183
BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
184
{
185
return createInstance(key, UBRK_TITLE, status);
186
}
187
188
// -------------------------------------
189
190
// Gets all the available locales that has localized text boundary data.
191
const Locale* U_EXPORT2
192
BreakIterator::getAvailableLocales(int32_t& count)
193
{
194
return Locale::getAvailableLocales(count);
195
}
196
197
// ------------------------------------------
198
//
199
// Constructors, destructor and assignment operator
200
//
201
//-------------------------------------------
202
203
BreakIterator::BreakIterator()
204
{
205
*validLocale = *actualLocale = 0;
206
}
207
208
BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
209
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
210
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
211
}
212
213
BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
214
if (this != &other) {
215
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
216
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
217
}
218
return *this;
219
}
220
221
BreakIterator::~BreakIterator()
222
{
223
}
224
225
// ------------------------------------------
226
//
227
// Registration
228
//
229
//-------------------------------------------
230
#if !UCONFIG_NO_SERVICE
231
232
// -------------------------------------
233
234
class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
235
public:
236
virtual ~ICUBreakIteratorFactory();
237
protected:
238
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const override {
239
return BreakIterator::makeInstance(loc, kind, status);
240
}
241
};
242
243
ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {}
244
245
// -------------------------------------
246
247
class ICUBreakIteratorService : public ICULocaleService {
248
public:
249
ICUBreakIteratorService()
250
: ICULocaleService(UNICODE_STRING("Break Iterator", 14))
251
{
252
UErrorCode status = U_ZERO_ERROR;
253
registerFactory(new ICUBreakIteratorFactory(), status);
254
}
255
256
virtual ~ICUBreakIteratorService();
257
258
virtual UObject* cloneInstance(UObject* instance) const override {
259
return ((BreakIterator*)instance)->clone();
260
}
261
262
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const override {
263
LocaleKey& lkey = (LocaleKey&)key;
264
int32_t kind = lkey.kind();
265
Locale loc;
266
lkey.currentLocale(loc);
267
return BreakIterator::makeInstance(loc, kind, status);
268
}
269
270
virtual UBool isDefault() const override {
271
return countFactories() == 1;
272
}
273
};
274
275
ICUBreakIteratorService::~ICUBreakIteratorService() {}
276
277
// -------------------------------------
278
279
// defined in ucln_cmn.h
280
U_NAMESPACE_END
281
282
static icu::UInitOnce gInitOnceBrkiter {};
283
static icu::ICULocaleService* gService = NULL;
284
285
286
287
/**
288
* Release all static memory held by breakiterator.
289
*/
290
U_CDECL_BEGIN
291
static UBool U_CALLCONV breakiterator_cleanup(void) {
292
#if !UCONFIG_NO_SERVICE
293
if (gService) {
294
delete gService;
295
gService = NULL;
296
}
297
gInitOnceBrkiter.reset();
298
#endif
299
return true;
300
}
301
U_CDECL_END
302
U_NAMESPACE_BEGIN
303
304
static void U_CALLCONV
305
initService(void) {
306
gService = new ICUBreakIteratorService();
307
ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
308
}
309
310
static ICULocaleService*
311
getService(void)
312
{
313
umtx_initOnce(gInitOnceBrkiter, &initService);
314
return gService;
315
}
316
317
318
// -------------------------------------
319
320
static inline UBool
321
hasService(void)
322
{
323
return !gInitOnceBrkiter.isReset() && getService() != NULL;
324
}
325
326
// -------------------------------------
327
328
URegistryKey U_EXPORT2
329
BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
330
{
331
ICULocaleService *service = getService();
332
if (service == NULL) {
333
status = U_MEMORY_ALLOCATION_ERROR;
334
return NULL;
335
}
336
return service->registerInstance(toAdopt, locale, kind, status);
337
}
338
339
// -------------------------------------
340
341
UBool U_EXPORT2
342
BreakIterator::unregister(URegistryKey key, UErrorCode& status)
343
{
344
if (U_SUCCESS(status)) {
345
if (hasService()) {
346
return gService->unregister(key, status);
347
}
348
status = U_MEMORY_ALLOCATION_ERROR;
349
}
350
return false;
351
}
352
353
// -------------------------------------
354
355
StringEnumeration* U_EXPORT2
356
BreakIterator::getAvailableLocales(void)
357
{
358
ICULocaleService *service = getService();
359
if (service == NULL) {
360
return NULL;
361
}
362
return service->getAvailableLocales();
363
}
364
#endif /* UCONFIG_NO_SERVICE */
365
366
// -------------------------------------
367
368
BreakIterator*
369
BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
370
{
371
if (U_FAILURE(status)) {
372
return NULL;
373
}
374
375
#if !UCONFIG_NO_SERVICE
376
if (hasService()) {
377
Locale actualLoc("");
378
BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
379
// TODO: The way the service code works in ICU 2.8 is that if
380
// there is a real registered break iterator, the actualLoc
381
// will be populated, but if the handleDefault path is taken
382
// (because nothing is registered that can handle the
383
// requested locale) then the actualLoc comes back empty. In
384
// that case, the returned object already has its actual/valid
385
// locale data populated (by makeInstance, which is what
386
// handleDefault calls), so we don't touch it. YES, A COMMENT
387
// THIS LONG is a sign of bad code -- so the action item is to
388
// revisit this in ICU 3.0 and clean it up/fix it/remove it.
389
if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) {
390
U_LOCALE_BASED(locBased, *result);
391
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
392
}
393
return result;
394
}
395
else
396
#endif
397
{
398
return makeInstance(loc, kind, status);
399
}
400
}
401
402
// -------------------------------------
403
enum { kKeyValueLenMax = 32 };
404
405
BreakIterator*
406
BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
407
{
408
409
if (U_FAILURE(status)) {
410
return NULL;
411
}
412
413
BreakIterator *result = NULL;
414
switch (kind) {
415
case UBRK_CHARACTER:
416
{
417
UTRACE_ENTRY(UTRACE_UBRK_CREATE_CHARACTER);
418
result = BreakIterator::buildInstance(loc, "grapheme", status);
419
UTRACE_EXIT_STATUS(status);
420
}
421
break;
422
case UBRK_WORD:
423
{
424
UTRACE_ENTRY(UTRACE_UBRK_CREATE_WORD);
425
result = BreakIterator::buildInstance(loc, "word", status);
426
UTRACE_EXIT_STATUS(status);
427
}
428
break;
429
case UBRK_LINE:
430
{
431
char lb_lw[kKeyValueLenMax];
432
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
433
uprv_strcpy(lb_lw, "line");
434
UErrorCode kvStatus = U_ZERO_ERROR;
435
CharString value;
436
CharStringByteSink valueSink(&value);
437
loc.getKeywordValue("lb", valueSink, kvStatus);
438
if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
439
uprv_strcat(lb_lw, "_");
440
uprv_strcat(lb_lw, value.data());
441
}
442
// lw=phrase is only supported in Japanese.
443
if (uprv_strcmp(loc.getLanguage(), "ja") == 0) {
444
value.clear();
445
loc.getKeywordValue("lw", valueSink, kvStatus);
446
if (U_SUCCESS(kvStatus) && value == "phrase") {
447
uprv_strcat(lb_lw, "_");
448
uprv_strcat(lb_lw, value.data());
449
}
450
}
451
result = BreakIterator::buildInstance(loc, lb_lw, status);
452
453
UTRACE_DATA1(UTRACE_INFO, "lb_lw=%s", lb_lw);
454
UTRACE_EXIT_STATUS(status);
455
}
456
break;
457
case UBRK_SENTENCE:
458
{
459
UTRACE_ENTRY(UTRACE_UBRK_CREATE_SENTENCE);
460
result = BreakIterator::buildInstance(loc, "sentence", status);
461
#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
462
char ssKeyValue[kKeyValueLenMax] = {0};
463
UErrorCode kvStatus = U_ZERO_ERROR;
464
int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
465
if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) {
466
FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus);
467
if (U_SUCCESS(kvStatus)) {
468
result = fbiBuilder->build(result, status);
469
delete fbiBuilder;
470
}
471
}
472
#endif
473
UTRACE_EXIT_STATUS(status);
474
}
475
break;
476
case UBRK_TITLE:
477
{
478
UTRACE_ENTRY(UTRACE_UBRK_CREATE_TITLE);
479
result = BreakIterator::buildInstance(loc, "title", status);
480
UTRACE_EXIT_STATUS(status);
481
}
482
break;
483
default:
484
status = U_ILLEGAL_ARGUMENT_ERROR;
485
}
486
487
if (U_FAILURE(status)) {
488
return NULL;
489
}
490
491
return result;
492
}
493
494
Locale
495
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
496
U_LOCALE_BASED(locBased, *this);
497
return locBased.getLocale(type, status);
498
}
499
500
const char *
501
BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
502
U_LOCALE_BASED(locBased, *this);
503
return locBased.getLocaleID(type, status);
504
}
505
506
507
// This implementation of getRuleStatus is a do-nothing stub, here to
508
// provide a default implementation for any derived BreakIterator classes that
509
// do not implement it themselves.
510
int32_t BreakIterator::getRuleStatus() const {
511
return 0;
512
}
513
514
// This implementation of getRuleStatusVec is a do-nothing stub, here to
515
// provide a default implementation for any derived BreakIterator classes that
516
// do not implement it themselves.
517
int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
518
if (U_FAILURE(status)) {
519
return 0;
520
}
521
if (capacity < 1) {
522
status = U_BUFFER_OVERFLOW_ERROR;
523
return 1;
524
}
525
*fillInVec = 0;
526
return 1;
527
}
528
529
BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
530
U_LOCALE_BASED(locBased, (*this));
531
locBased.setLocaleIDs(valid, actual);
532
}
533
534
U_NAMESPACE_END
535
536
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
537
538
//eof
539
540