Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/rbbi.h
38827 views
1
/*
2
***************************************************************************
3
* Copyright (C) 1999-2014 International Business Machines Corporation *
4
* and others. All rights reserved. *
5
***************************************************************************
6
7
**********************************************************************
8
* Date Name Description
9
* 10/22/99 alan Creation.
10
* 11/11/99 rgillam Complete port from Java.
11
**********************************************************************
12
*/
13
14
#ifndef RBBI_H
15
#define RBBI_H
16
17
#include "unicode/utypes.h"
18
19
/**
20
* \file
21
* \brief C++ API: Rule Based Break Iterator
22
*/
23
24
#if !UCONFIG_NO_BREAK_ITERATION
25
26
#include "unicode/brkiter.h"
27
#include "unicode/udata.h"
28
#include "unicode/parseerr.h"
29
#include "unicode/schriter.h"
30
#include "unicode/uchriter.h"
31
32
33
struct UTrie;
34
35
U_NAMESPACE_BEGIN
36
37
/** @internal */
38
struct RBBIDataHeader;
39
class RuleBasedBreakIteratorTables;
40
class BreakIterator;
41
class RBBIDataWrapper;
42
class UStack;
43
class LanguageBreakEngine;
44
class UnhandledEngine;
45
struct RBBIStateTable;
46
47
48
49
50
/**
51
*
52
* A subclass of BreakIterator whose behavior is specified using a list of rules.
53
* <p>Instances of this class are most commonly created by the factory methods of
54
* BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc.,
55
* and then used via the abstract API in class BreakIterator</p>
56
*
57
* <p>See the ICU User Guide for information on Break Iterator Rules.</p>
58
*
59
* <p>This class is not intended to be subclassed. (Class DictionaryBasedBreakIterator
60
* is a subclass, but that relationship is effectively internal to the ICU
61
* implementation. The subclassing interface to RulesBasedBreakIterator is
62
* not part of the ICU API, and may not remain stable.</p>
63
*
64
*/
65
class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator {
66
67
protected:
68
/**
69
* The UText through which this BreakIterator accesses the text
70
* @internal
71
*/
72
UText *fText;
73
74
/**
75
* A character iterator that refers to the same text as the UText, above.
76
* Only included for compatibility with old API, which was based on CharacterIterators.
77
* Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
78
*/
79
CharacterIterator *fCharIter;
80
81
/**
82
* When the input text is provided by a UnicodeString, this will point to
83
* a characterIterator that wraps that data. Needed only for the
84
* implementation of getText(), a backwards compatibility issue.
85
*/
86
StringCharacterIterator *fSCharIter;
87
88
/**
89
* When the input text is provided by a UText, this
90
* dummy CharacterIterator over an empty string will
91
* be returned from getText()
92
*/
93
UCharCharacterIterator *fDCharIter;
94
95
/**
96
* The rule data for this BreakIterator instance
97
* @internal
98
*/
99
RBBIDataWrapper *fData;
100
101
/** Index of the Rule {tag} values for the most recent match.
102
* @internal
103
*/
104
int32_t fLastRuleStatusIndex;
105
106
/**
107
* Rule tag value valid flag.
108
* Some iterator operations don't intrinsically set the correct tag value.
109
* This flag lets us lazily compute the value if we are ever asked for it.
110
* @internal
111
*/
112
UBool fLastStatusIndexValid;
113
114
/**
115
* Counter for the number of characters encountered with the "dictionary"
116
* flag set.
117
* @internal
118
*/
119
uint32_t fDictionaryCharCount;
120
121
/**
122
* When a range of characters is divided up using the dictionary, the break
123
* positions that are discovered are stored here, preventing us from having
124
* to use either the dictionary or the state table again until the iterator
125
* leaves this range of text. Has the most impact for line breaking.
126
* @internal
127
*/
128
int32_t* fCachedBreakPositions;
129
130
/**
131
* The number of elements in fCachedBreakPositions
132
* @internal
133
*/
134
int32_t fNumCachedBreakPositions;
135
136
/**
137
* if fCachedBreakPositions is not null, this indicates which item in the
138
* cache the current iteration position refers to
139
* @internal
140
*/
141
int32_t fPositionInCache;
142
143
/**
144
*
145
* If present, UStack of LanguageBreakEngine objects that might handle
146
* dictionary characters. Searched from top to bottom to find an object to
147
* handle a given character.
148
* @internal
149
*/
150
UStack *fLanguageBreakEngines;
151
152
/**
153
*
154
* If present, the special LanguageBreakEngine used for handling
155
* characters that are in the dictionary set, but not handled by any
156
* LangugageBreakEngine.
157
* @internal
158
*/
159
UnhandledEngine *fUnhandledBreakEngine;
160
161
/**
162
*
163
* The type of the break iterator, or -1 if it has not been set.
164
* @internal
165
*/
166
int32_t fBreakType;
167
168
protected:
169
//=======================================================================
170
// constructors
171
//=======================================================================
172
173
#ifndef U_HIDE_INTERNAL_API
174
/**
175
* Constant to be used in the constructor
176
* RuleBasedBreakIterator(RBBIDataHeader*, EDontAdopt, UErrorCode &);
177
* which does not adopt the memory indicated by the RBBIDataHeader*
178
* parameter.
179
*
180
* @internal
181
*/
182
enum EDontAdopt {
183
kDontAdopt
184
};
185
186
/**
187
* Constructor from a flattened set of RBBI data in malloced memory.
188
* RulesBasedBreakIterators built from a custom set of rules
189
* are created via this constructor; the rules are compiled
190
* into memory, then the break iterator is constructed here.
191
*
192
* The break iterator adopts the memory, and will
193
* free it when done.
194
* @internal
195
*/
196
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
197
198
/**
199
* Constructor from a flattened set of RBBI data in memory which need not
200
* be malloced (e.g. it may be a memory-mapped file, etc.).
201
*
202
* This version does not adopt the memory, and does not
203
* free it when done.
204
* @internal
205
*/
206
RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
207
#endif /* U_HIDE_INTERNAL_API */
208
209
210
friend class RBBIRuleBuilder;
211
/** @internal */
212
friend class BreakIterator;
213
214
215
216
public:
217
218
/** Default constructor. Creates an empty shell of an iterator, with no
219
* rules or text to iterate over. Object can subsequently be assigned to.
220
* @stable ICU 2.2
221
*/
222
RuleBasedBreakIterator();
223
224
/**
225
* Copy constructor. Will produce a break iterator with the same behavior,
226
* and which iterates over the same text, as the one passed in.
227
* @param that The RuleBasedBreakIterator passed to be copied
228
* @stable ICU 2.0
229
*/
230
RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
231
232
/**
233
* Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
234
* @param rules The break rules to be used.
235
* @param parseError In the event of a syntax error in the rules, provides the location
236
* within the rules of the problem.
237
* @param status Information on any errors encountered.
238
* @stable ICU 2.2
239
*/
240
RuleBasedBreakIterator( const UnicodeString &rules,
241
UParseError &parseError,
242
UErrorCode &status);
243
244
/**
245
* Contruct a RuleBasedBreakIterator from a set of precompiled binary rules.
246
* Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
247
* Construction of a break iterator in this way is substantially faster than
248
* constuction from source rules.
249
*
250
* Ownership of the storage containing the compiled rules remains with the
251
* caller of this function. The compiled rules must not be modified or
252
* deleted during the life of the break iterator.
253
*
254
* The compiled rules are not compatible across different major versions of ICU.
255
* The compiled rules are comaptible only between machines with the same
256
* byte ordering (little or big endian) and the same base character set family
257
* (ASCII or EBCDIC).
258
*
259
* @see #getBinaryRules
260
* @param compiledRules A pointer to the compiled break rules to be used.
261
* @param ruleLength The length of the compiled break rules, in bytes. This
262
* corresponds to the length value produced by getBinaryRules().
263
* @param status Information on any errors encountered, including invalid
264
* binary rules.
265
* @stable ICU 4.8
266
*/
267
RuleBasedBreakIterator(const uint8_t *compiledRules,
268
uint32_t ruleLength,
269
UErrorCode &status);
270
271
/**
272
* This constructor uses the udata interface to create a BreakIterator
273
* whose internal tables live in a memory-mapped file. "image" is an
274
* ICU UDataMemory handle for the pre-compiled break iterator tables.
275
* @param image handle to the memory image for the break iterator data.
276
* Ownership of the UDataMemory handle passes to the Break Iterator,
277
* which will be responsible for closing it when it is no longer needed.
278
* @param status Information on any errors encountered.
279
* @see udata_open
280
* @see #getBinaryRules
281
* @stable ICU 2.8
282
*/
283
RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
284
285
/**
286
* Destructor
287
* @stable ICU 2.0
288
*/
289
virtual ~RuleBasedBreakIterator();
290
291
/**
292
* Assignment operator. Sets this iterator to have the same behavior,
293
* and iterate over the same text, as the one passed in.
294
* @param that The RuleBasedBreakItertor passed in
295
* @return the newly created RuleBasedBreakIterator
296
* @stable ICU 2.0
297
*/
298
RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
299
300
/**
301
* Equality operator. Returns TRUE if both BreakIterators are of the
302
* same class, have the same behavior, and iterate over the same text.
303
* @param that The BreakIterator to be compared for equality
304
* @return TRUE if both BreakIterators are of the
305
* same class, have the same behavior, and iterate over the same text.
306
* @stable ICU 2.0
307
*/
308
virtual UBool operator==(const BreakIterator& that) const;
309
310
/**
311
* Not-equal operator. If operator== returns TRUE, this returns FALSE,
312
* and vice versa.
313
* @param that The BreakIterator to be compared for inequality
314
* @return TRUE if both BreakIterators are not same.
315
* @stable ICU 2.0
316
*/
317
UBool operator!=(const BreakIterator& that) const;
318
319
/**
320
* Returns a newly-constructed RuleBasedBreakIterator with the same
321
* behavior, and iterating over the same text, as this one.
322
* Differs from the copy constructor in that it is polymorphic, and
323
* will correctly clone (copy) a derived class.
324
* clone() is thread safe. Multiple threads may simultaeneously
325
* clone the same source break iterator.
326
* @return a newly-constructed RuleBasedBreakIterator
327
* @stable ICU 2.0
328
*/
329
virtual BreakIterator* clone() const;
330
331
/**
332
* Compute a hash code for this BreakIterator
333
* @return A hash code
334
* @stable ICU 2.0
335
*/
336
virtual int32_t hashCode(void) const;
337
338
/**
339
* Returns the description used to create this iterator
340
* @return the description used to create this iterator
341
* @stable ICU 2.0
342
*/
343
virtual const UnicodeString& getRules(void) const;
344
345
//=======================================================================
346
// BreakIterator overrides
347
//=======================================================================
348
349
/**
350
* <p>
351
* Return a CharacterIterator over the text being analyzed.
352
* The returned character iterator is owned by the break iterator, and must
353
* not be deleted by the caller. Repeated calls to this function may
354
* return the same CharacterIterator.
355
* </p>
356
* <p>
357
* The returned character iterator must not be used concurrently with
358
* the break iterator. If concurrent operation is needed, clone the
359
* returned character iterator first and operate on the clone.
360
* </p>
361
* <p>
362
* When the break iterator is operating on text supplied via a UText,
363
* this function will fail. Lacking any way to signal failures, it
364
* returns an CharacterIterator containing no text.
365
* The function getUText() provides similar functionality,
366
* is reliable, and is more efficient.
367
* </p>
368
*
369
* TODO: deprecate this function?
370
*
371
* @return An iterator over the text being analyzed.
372
* @stable ICU 2.0
373
*/
374
virtual CharacterIterator& getText(void) const;
375
376
377
/**
378
* Get a UText for the text being analyzed.
379
* The returned UText is a shallow clone of the UText used internally
380
* by the break iterator implementation. It can safely be used to
381
* access the text without impacting any break iterator operations,
382
* but the underlying text itself must not be altered.
383
*
384
* @param fillIn A UText to be filled in. If NULL, a new UText will be
385
* allocated to hold the result.
386
* @param status receives any error codes.
387
* @return The current UText for this break iterator. If an input
388
* UText was provided, it will always be returned.
389
* @stable ICU 3.4
390
*/
391
virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
392
393
/**
394
* Set the iterator to analyze a new piece of text. This function resets
395
* the current iteration position to the beginning of the text.
396
* @param newText An iterator over the text to analyze. The BreakIterator
397
* takes ownership of the character iterator. The caller MUST NOT delete it!
398
* @stable ICU 2.0
399
*/
400
virtual void adoptText(CharacterIterator* newText);
401
402
/**
403
* Set the iterator to analyze a new piece of text. This function resets
404
* the current iteration position to the beginning of the text.
405
* @param newText The text to analyze.
406
* @stable ICU 2.0
407
*/
408
virtual void setText(const UnicodeString& newText);
409
410
/**
411
* Reset the break iterator to operate over the text represented by
412
* the UText. The iterator position is reset to the start.
413
*
414
* This function makes a shallow clone of the supplied UText. This means
415
* that the caller is free to immediately close or otherwise reuse the
416
* Utext that was passed as a parameter, but that the underlying text itself
417
* must not be altered while being referenced by the break iterator.
418
*
419
* @param text The UText used to change the text.
420
* @param status Receives any error codes.
421
* @stable ICU 3.4
422
*/
423
virtual void setText(UText *text, UErrorCode &status);
424
425
/**
426
* Sets the current iteration position to the beginning of the text, position zero.
427
* @return The offset of the beginning of the text, zero.
428
* @stable ICU 2.0
429
*/
430
virtual int32_t first(void);
431
432
/**
433
* Sets the current iteration position to the end of the text.
434
* @return The text's past-the-end offset.
435
* @stable ICU 2.0
436
*/
437
virtual int32_t last(void);
438
439
/**
440
* Advances the iterator either forward or backward the specified number of steps.
441
* Negative values move backward, and positive values move forward. This is
442
* equivalent to repeatedly calling next() or previous().
443
* @param n The number of steps to move. The sign indicates the direction
444
* (negative is backwards, and positive is forwards).
445
* @return The character offset of the boundary position n boundaries away from
446
* the current one.
447
* @stable ICU 2.0
448
*/
449
virtual int32_t next(int32_t n);
450
451
/**
452
* Advances the iterator to the next boundary position.
453
* @return The position of the first boundary after this one.
454
* @stable ICU 2.0
455
*/
456
virtual int32_t next(void);
457
458
/**
459
* Moves the iterator backwards, to the last boundary preceding this one.
460
* @return The position of the last boundary position preceding this one.
461
* @stable ICU 2.0
462
*/
463
virtual int32_t previous(void);
464
465
/**
466
* Sets the iterator to refer to the first boundary position following
467
* the specified position.
468
* @param offset The position from which to begin searching for a break position.
469
* @return The position of the first break after the current position.
470
* @stable ICU 2.0
471
*/
472
virtual int32_t following(int32_t offset);
473
474
/**
475
* Sets the iterator to refer to the last boundary position before the
476
* specified position.
477
* @param offset The position to begin searching for a break from.
478
* @return The position of the last boundary before the starting position.
479
* @stable ICU 2.0
480
*/
481
virtual int32_t preceding(int32_t offset);
482
483
/**
484
* Returns true if the specfied position is a boundary position. As a side
485
* effect, leaves the iterator pointing to the first boundary position at
486
* or after "offset".
487
* @param offset the offset to check.
488
* @return True if "offset" is a boundary position.
489
* @stable ICU 2.0
490
*/
491
virtual UBool isBoundary(int32_t offset);
492
493
/**
494
* Returns the current iteration position.
495
* @return The current iteration position.
496
* @stable ICU 2.0
497
*/
498
virtual int32_t current(void) const;
499
500
501
/**
502
* Return the status tag from the break rule that determined the most recently
503
* returned break position. For break rules that do not specify a
504
* status, a default value of 0 is returned. If more than one break rule
505
* would cause a boundary to be located at some position in the text,
506
* the numerically largest of the applicable status values is returned.
507
* <p>
508
* Of the standard types of ICU break iterators, only word break and
509
* line break provide status values. The values are defined in
510
* the header file ubrk.h. For Word breaks, the status allows distinguishing between words
511
* that contain alphabetic letters, "words" that appear to be numbers,
512
* punctuation and spaces, words containing ideographic characters, and
513
* more. For Line Break, the status distinguishes between hard (mandatory) breaks
514
* and soft (potential) break positions.
515
* <p>
516
* <code>getRuleStatus()</code> can be called after obtaining a boundary
517
* position from <code>next()</code>, <code>previous()</code>, or
518
* any other break iterator functions that returns a boundary position.
519
* <p>
520
* When creating custom break rules, one is free to define whatever
521
* status values may be convenient for the application.
522
* <p>
523
* Note: this function is not thread safe. It should not have been
524
* declared const, and the const remains only for compatibility
525
* reasons. (The function is logically const, but not bit-wise const).
526
* <p>
527
* @return the status from the break rule that determined the most recently
528
* returned break position.
529
*
530
* @see UWordBreak
531
* @stable ICU 2.2
532
*/
533
virtual int32_t getRuleStatus() const;
534
535
/**
536
* Get the status (tag) values from the break rule(s) that determined the most
537
* recently returned break position.
538
* <p>
539
* The returned status value(s) are stored into an array provided by the caller.
540
* The values are stored in sorted (ascending) order.
541
* If the capacity of the output array is insufficient to hold the data,
542
* the output will be truncated to the available length, and a
543
* U_BUFFER_OVERFLOW_ERROR will be signaled.
544
*
545
* @param fillInVec an array to be filled in with the status values.
546
* @param capacity the length of the supplied vector. A length of zero causes
547
* the function to return the number of status values, in the
548
* normal way, without attemtping to store any values.
549
* @param status receives error codes.
550
* @return The number of rule status values from rules that determined
551
* the most recent boundary returned by the break iterator.
552
* In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
553
* is the total number of status values that were available,
554
* not the reduced number that were actually returned.
555
* @see getRuleStatus
556
* @stable ICU 3.0
557
*/
558
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
559
560
/**
561
* Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
562
* This method is to implement a simple version of RTTI, since not all
563
* C++ compilers support genuine RTTI. Polymorphic operator==() and
564
* clone() methods call this method.
565
*
566
* @return The class ID for this object. All objects of a
567
* given class have the same class ID. Objects of
568
* other classes have different class IDs.
569
* @stable ICU 2.0
570
*/
571
virtual UClassID getDynamicClassID(void) const;
572
573
/**
574
* Returns the class ID for this class. This is useful only for
575
* comparing to a return value from getDynamicClassID(). For example:
576
*
577
* Base* polymorphic_pointer = createPolymorphicObject();
578
* if (polymorphic_pointer->getDynamicClassID() ==
579
* Derived::getStaticClassID()) ...
580
*
581
* @return The class ID for all objects of this class.
582
* @stable ICU 2.0
583
*/
584
static UClassID U_EXPORT2 getStaticClassID(void);
585
586
/**
587
* Deprecated functionality. Use clone() instead.
588
*
589
* Create a clone (copy) of this break iterator in memory provided
590
* by the caller. The idea is to increase performance by avoiding
591
* a storage allocation. Use of this functoin is NOT RECOMMENDED.
592
* Performance gains are minimal, and correct buffer management is
593
* tricky. Use clone() instead.
594
*
595
* @param stackBuffer The pointer to the memory into which the cloned object
596
* should be placed. If NULL, allocate heap memory
597
* for the cloned object.
598
* @param BufferSize The size of the buffer. If zero, return the required
599
* buffer size, but do not clone the object. If the
600
* size was too small (but not zero), allocate heap
601
* storage for the cloned object.
602
*
603
* @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be
604
* returned if the the provided buffer was too small, and
605
* the clone was therefore put on the heap.
606
*
607
* @return Pointer to the clone object. This may differ from the stackBuffer
608
* address if the byte alignment of the stack buffer was not suitable
609
* or if the stackBuffer was too small to hold the clone.
610
* @deprecated ICU 52. Use clone() instead.
611
*/
612
virtual BreakIterator * createBufferClone(void *stackBuffer,
613
int32_t &BufferSize,
614
UErrorCode &status);
615
616
617
/**
618
* Return the binary form of compiled break rules,
619
* which can then be used to create a new break iterator at some
620
* time in the future. Creating a break iterator from pre-compiled rules
621
* is much faster than building one from the source form of the
622
* break rules.
623
*
624
* The binary data can only be used with the same version of ICU
625
* and on the same platform type (processor endian-ness)
626
*
627
* @param length Returns the length of the binary data. (Out paramter.)
628
*
629
* @return A pointer to the binary (compiled) rule data. The storage
630
* belongs to the RulesBasedBreakIterator object, not the
631
* caller, and must not be modified or deleted.
632
* @stable ICU 4.8
633
*/
634
virtual const uint8_t *getBinaryRules(uint32_t &length);
635
636
/**
637
* Set the subject text string upon which the break iterator is operating
638
* without changing any other aspect of the matching state.
639
* The new and previous text strings must have the same content.
640
*
641
* This function is intended for use in environments where ICU is operating on
642
* strings that may move around in memory. It provides a mechanism for notifying
643
* ICU that the string has been relocated, and providing a new UText to access the
644
* string in its new position.
645
*
646
* Note that the break iterator implementation never copies the underlying text
647
* of a string being processed, but always operates directly on the original text
648
* provided by the user. Refreshing simply drops the references to the old text
649
* and replaces them with references to the new.
650
*
651
* Caution: this function is normally used only by very specialized,
652
* system-level code. One example use case is with garbage collection that moves
653
* the text in memory.
654
*
655
* @param input The new (moved) text string.
656
* @param status Receives errors detected by this function.
657
* @return *this
658
*
659
* @stable ICU 49
660
*/
661
virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status);
662
663
664
protected:
665
//=======================================================================
666
// implementation
667
//=======================================================================
668
/**
669
* Dumps caches and performs other actions associated with a complete change
670
* in text or iteration position.
671
* @internal
672
*/
673
virtual void reset(void);
674
675
#if 0
676
/**
677
* Return true if the category lookup for this char
678
* indicates that it is in the set of dictionary lookup chars.
679
* This function is intended for use by dictionary based break iterators.
680
* @return true if the category lookup for this char
681
* indicates that it is in the set of dictionary lookup chars.
682
* @internal
683
*/
684
virtual UBool isDictionaryChar(UChar32);
685
686
/**
687
* Get the type of the break iterator.
688
* @internal
689
*/
690
virtual int32_t getBreakType() const;
691
#endif
692
693
/**
694
* Set the type of the break iterator.
695
* @internal
696
*/
697
virtual void setBreakType(int32_t type);
698
699
#ifndef U_HIDE_INTERNAL_API
700
/**
701
* Common initialization function, used by constructors and bufferClone.
702
* @internal
703
*/
704
void init();
705
#endif /* U_HIDE_INTERNAL_API */
706
707
private:
708
709
/**
710
* This method backs the iterator back up to a "safe position" in the text.
711
* This is a position that we know, without any context, must be a break position.
712
* The various calling methods then iterate forward from this safe position to
713
* the appropriate position to return. (For more information, see the description
714
* of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
715
* @param statetable state table used of moving backwards
716
* @internal
717
*/
718
int32_t handlePrevious(const RBBIStateTable *statetable);
719
720
/**
721
* This method is the actual implementation of the next() method. All iteration
722
* vectors through here. This method initializes the state machine to state 1
723
* and advances through the text character by character until we reach the end
724
* of the text or the state machine transitions to state 0. We update our return
725
* value every time the state machine passes through a possible end state.
726
* @param statetable state table used of moving forwards
727
* @internal
728
*/
729
int32_t handleNext(const RBBIStateTable *statetable);
730
731
protected:
732
733
#ifndef U_HIDE_INTERNAL_API
734
/**
735
* This is the function that actually implements dictionary-based
736
* breaking. Covering at least the range from startPos to endPos,
737
* it checks for dictionary characters, and if it finds them determines
738
* the appropriate object to deal with them. It may cache found breaks in
739
* fCachedBreakPositions as it goes. It may well also look at text outside
740
* the range startPos to endPos.
741
* If going forward, endPos is the normal Unicode break result, and
742
* if goind in reverse, startPos is the normal Unicode break result
743
* @param startPos The start position of a range of text
744
* @param endPos The end position of a range of text
745
* @param reverse The call is for the reverse direction
746
* @internal
747
*/
748
int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
749
#endif /* U_HIDE_INTERNAL_API */
750
751
private:
752
753
/**
754
* This function returns the appropriate LanguageBreakEngine for a
755
* given character c.
756
* @param c A character in the dictionary set
757
* @internal
758
*/
759
const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
760
761
/**
762
* @internal
763
*/
764
void makeRuleStatusValid();
765
766
};
767
768
//------------------------------------------------------------------------------
769
//
770
// Inline Functions Definitions ...
771
//
772
//------------------------------------------------------------------------------
773
774
inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
775
return !operator==(that);
776
}
777
778
U_NAMESPACE_END
779
780
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
781
782
#endif
783
784