Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/chariter.h
38827 views
1
/*
2
********************************************************************
3
*
4
* Copyright (C) 1997-2011, International Business Machines
5
* Corporation and others. All Rights Reserved.
6
*
7
********************************************************************
8
*/
9
10
#ifndef CHARITER_H
11
#define CHARITER_H
12
13
#include "unicode/utypes.h"
14
#include "unicode/uobject.h"
15
#include "unicode/unistr.h"
16
/**
17
* \file
18
* \brief C++ API: Character Iterator
19
*/
20
21
U_NAMESPACE_BEGIN
22
/**
23
* Abstract class that defines an API for forward-only iteration
24
* on text objects.
25
* This is a minimal interface for iteration without random access
26
* or backwards iteration. It is especially useful for wrapping
27
* streams with converters into an object for collation or
28
* normalization.
29
*
30
* <p>Characters can be accessed in two ways: as code units or as
31
* code points.
32
* Unicode code points are 21-bit integers and are the scalar values
33
* of Unicode characters. ICU uses the type UChar32 for them.
34
* Unicode code units are the storage units of a given
35
* Unicode/UCS Transformation Format (a character encoding scheme).
36
* With UTF-16, all code points can be represented with either one
37
* or two code units ("surrogates").
38
* String storage is typically based on code units, while properties
39
* of characters are typically determined using code point values.
40
* Some processes may be designed to work with sequences of code units,
41
* or it may be known that all characters that are important to an
42
* algorithm can be represented with single code units.
43
* Other processes will need to use the code point access functions.</p>
44
*
45
* <p>ForwardCharacterIterator provides nextPostInc() to access
46
* a code unit and advance an internal position into the text object,
47
* similar to a <code>return text[position++]</code>.<br>
48
* It provides next32PostInc() to access a code point and advance an internal
49
* position.</p>
50
*
51
* <p>next32PostInc() assumes that the current position is that of
52
* the beginning of a code point, i.e., of its first code unit.
53
* After next32PostInc(), this will be true again.
54
* In general, access to code units and code points in the same
55
* iteration loop should not be mixed. In UTF-16, if the current position
56
* is on a second code unit (Low Surrogate), then only that code unit
57
* is returned even by next32PostInc().</p>
58
*
59
* <p>For iteration with either function, there are two ways to
60
* check for the end of the iteration. When there are no more
61
* characters in the text object:
62
* <ul>
63
* <li>The hasNext() function returns FALSE.</li>
64
* <li>nextPostInc() and next32PostInc() return DONE
65
* when one attempts to read beyond the end of the text object.</li>
66
* </ul>
67
*
68
* Example:
69
* \code
70
* void function1(ForwardCharacterIterator &it) {
71
* UChar32 c;
72
* while(it.hasNext()) {
73
* c=it.next32PostInc();
74
* // use c
75
* }
76
* }
77
*
78
* void function1(ForwardCharacterIterator &it) {
79
* UChar c;
80
* while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
81
* // use c
82
* }
83
* }
84
* \endcode
85
* </p>
86
*
87
* @stable ICU 2.0
88
*/
89
class U_COMMON_API ForwardCharacterIterator : public UObject {
90
public:
91
/**
92
* Value returned by most of ForwardCharacterIterator's functions
93
* when the iterator has reached the limits of its iteration.
94
* @stable ICU 2.0
95
*/
96
enum { DONE = 0xffff };
97
98
/**
99
* Destructor.
100
* @stable ICU 2.0
101
*/
102
virtual ~ForwardCharacterIterator();
103
104
/**
105
* Returns true when both iterators refer to the same
106
* character in the same character-storage object.
107
* @param that The ForwardCharacterIterator to be compared for equality
108
* @return true when both iterators refer to the same
109
* character in the same character-storage object
110
* @stable ICU 2.0
111
*/
112
virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
113
114
/**
115
* Returns true when the iterators refer to different
116
* text-storage objects, or to different characters in the
117
* same text-storage object.
118
* @param that The ForwardCharacterIterator to be compared for inequality
119
* @return true when the iterators refer to different
120
* text-storage objects, or to different characters in the
121
* same text-storage object
122
* @stable ICU 2.0
123
*/
124
inline UBool operator!=(const ForwardCharacterIterator& that) const;
125
126
/**
127
* Generates a hash code for this iterator.
128
* @return the hash code.
129
* @stable ICU 2.0
130
*/
131
virtual int32_t hashCode(void) const = 0;
132
133
/**
134
* Returns a UClassID for this ForwardCharacterIterator ("poor man's
135
* RTTI").<P> Despite the fact that this function is public,
136
* DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
137
* @return a UClassID for this ForwardCharacterIterator
138
* @stable ICU 2.0
139
*/
140
virtual UClassID getDynamicClassID(void) const = 0;
141
142
/**
143
* Gets the current code unit for returning and advances to the next code unit
144
* in the iteration range
145
* (toward endIndex()). If there are
146
* no more code units to return, returns DONE.
147
* @return the current code unit.
148
* @stable ICU 2.0
149
*/
150
virtual UChar nextPostInc(void) = 0;
151
152
/**
153
* Gets the current code point for returning and advances to the next code point
154
* in the iteration range
155
* (toward endIndex()). If there are
156
* no more code points to return, returns DONE.
157
* @return the current code point.
158
* @stable ICU 2.0
159
*/
160
virtual UChar32 next32PostInc(void) = 0;
161
162
/**
163
* Returns FALSE if there are no more code units or code points
164
* at or after the current position in the iteration range.
165
* This is used with nextPostInc() or next32PostInc() in forward
166
* iteration.
167
* @returns FALSE if there are no more code units or code points
168
* at or after the current position in the iteration range.
169
* @stable ICU 2.0
170
*/
171
virtual UBool hasNext() = 0;
172
173
protected:
174
/** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
175
ForwardCharacterIterator();
176
177
/** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
178
ForwardCharacterIterator(const ForwardCharacterIterator &other);
179
180
/**
181
* Assignment operator to be overridden in the implementing class.
182
* @stable ICU 2.0
183
*/
184
ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
185
};
186
187
/**
188
* Abstract class that defines an API for iteration
189
* on text objects.
190
* This is an interface for forward and backward iteration
191
* and random access into a text object.
192
*
193
* <p>The API provides backward compatibility to the Java and older ICU
194
* CharacterIterator classes but extends them significantly:
195
* <ol>
196
* <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
197
* <li>While the old API functions provided forward iteration with
198
* "pre-increment" semantics, the new one also provides functions
199
* with "post-increment" semantics. They are more efficient and should
200
* be the preferred iterator functions for new implementations.
201
* The backward iteration always had "pre-decrement" semantics, which
202
* are efficient.</li>
203
* <li>Just like ForwardCharacterIterator, it provides access to
204
* both code units and code points. Code point access versions are available
205
* for the old and the new iteration semantics.</li>
206
* <li>There are new functions for setting and moving the current position
207
* without returning a character, for efficiency.</li>
208
* </ol>
209
*
210
* See ForwardCharacterIterator for examples for using the new forward iteration
211
* functions. For backward iteration, there is also a hasPrevious() function
212
* that can be used analogously to hasNext().
213
* The old functions work as before and are shown below.</p>
214
*
215
* <p>Examples for some of the new functions:</p>
216
*
217
* Forward iteration with hasNext():
218
* \code
219
* void forward1(CharacterIterator &it) {
220
* UChar32 c;
221
* for(it.setToStart(); it.hasNext();) {
222
* c=it.next32PostInc();
223
* // use c
224
* }
225
* }
226
* \endcode
227
* Forward iteration more similar to loops with the old forward iteration,
228
* showing a way to convert simple for() loops:
229
* \code
230
* void forward2(CharacterIterator &it) {
231
* UChar c;
232
* for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
233
* // use c
234
* }
235
* }
236
* \endcode
237
* Backward iteration with setToEnd() and hasPrevious():
238
* \code
239
* void backward1(CharacterIterator &it) {
240
* UChar32 c;
241
* for(it.setToEnd(); it.hasPrevious();) {
242
* c=it.previous32();
243
* // use c
244
* }
245
* }
246
* \endcode
247
* Backward iteration with a more traditional for() loop:
248
* \code
249
* void backward2(CharacterIterator &it) {
250
* UChar c;
251
* for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
252
* // use c
253
* }
254
* }
255
* \endcode
256
*
257
* Example for random access:
258
* \code
259
* void random(CharacterIterator &it) {
260
* // set to the third code point from the beginning
261
* it.move32(3, CharacterIterator::kStart);
262
* // get a code point from here without moving the position
263
* UChar32 c=it.current32();
264
* // get the position
265
* int32_t pos=it.getIndex();
266
* // get the previous code unit
267
* UChar u=it.previous();
268
* // move back one more code unit
269
* it.move(-1, CharacterIterator::kCurrent);
270
* // set the position back to where it was
271
* // and read the same code point c and move beyond it
272
* it.setIndex(pos);
273
* if(c!=it.next32PostInc()) {
274
* exit(1); // CharacterIterator inconsistent
275
* }
276
* }
277
* \endcode
278
*
279
* <p>Examples, especially for the old API:</p>
280
*
281
* Function processing characters, in this example simple output
282
* <pre>
283
* \code
284
* void processChar( UChar c )
285
* {
286
* cout << " " << c;
287
* }
288
* \endcode
289
* </pre>
290
* Traverse the text from start to finish
291
* <pre>
292
* \code
293
* void traverseForward(CharacterIterator& iter)
294
* {
295
* for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
296
* processChar(c);
297
* }
298
* }
299
* \endcode
300
* </pre>
301
* Traverse the text backwards, from end to start
302
* <pre>
303
* \code
304
* void traverseBackward(CharacterIterator& iter)
305
* {
306
* for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
307
* processChar(c);
308
* }
309
* }
310
* \endcode
311
* </pre>
312
* Traverse both forward and backward from a given position in the text.
313
* Calls to notBoundary() in this example represents some additional stopping criteria.
314
* <pre>
315
* \code
316
* void traverseOut(CharacterIterator& iter, int32_t pos)
317
* {
318
* UChar c;
319
* for (c = iter.setIndex(pos);
320
* c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
321
* c = iter.next()) {}
322
* int32_t end = iter.getIndex();
323
* for (c = iter.setIndex(pos);
324
* c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
325
* c = iter.previous()) {}
326
* int32_t start = iter.getIndex() + 1;
327
*
328
* cout << "start: " << start << " end: " << end << endl;
329
* for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
330
* processChar(c);
331
* }
332
* }
333
* \endcode
334
* </pre>
335
* Creating a StringCharacterIterator and calling the test functions
336
* <pre>
337
* \code
338
* void CharacterIterator_Example( void )
339
* {
340
* cout << endl << "===== CharacterIterator_Example: =====" << endl;
341
* UnicodeString text("Ein kleiner Satz.");
342
* StringCharacterIterator iterator(text);
343
* cout << "----- traverseForward: -----------" << endl;
344
* traverseForward( iterator );
345
* cout << endl << endl << "----- traverseBackward: ----------" << endl;
346
* traverseBackward( iterator );
347
* cout << endl << endl << "----- traverseOut: ---------------" << endl;
348
* traverseOut( iterator, 7 );
349
* cout << endl << endl << "-----" << endl;
350
* }
351
* \endcode
352
* </pre>
353
*
354
* @stable ICU 2.0
355
*/
356
class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
357
public:
358
/**
359
* Origin enumeration for the move() and move32() functions.
360
* @stable ICU 2.0
361
*/
362
enum EOrigin { kStart, kCurrent, kEnd };
363
364
/**
365
* Destructor.
366
* @stable ICU 2.0
367
*/
368
virtual ~CharacterIterator();
369
370
/**
371
* Returns a pointer to a new CharacterIterator of the same
372
* concrete class as this one, and referring to the same
373
* character in the same text-storage object as this one. The
374
* caller is responsible for deleting the new clone.
375
* @return a pointer to a new CharacterIterator
376
* @stable ICU 2.0
377
*/
378
virtual CharacterIterator* clone(void) const = 0;
379
380
/**
381
* Sets the iterator to refer to the first code unit in its
382
* iteration range, and returns that code unit.
383
* This can be used to begin an iteration with next().
384
* @return the first code unit in its iteration range.
385
* @stable ICU 2.0
386
*/
387
virtual UChar first(void) = 0;
388
389
/**
390
* Sets the iterator to refer to the first code unit in its
391
* iteration range, returns that code unit, and moves the position
392
* to the second code unit. This is an alternative to setToStart()
393
* for forward iteration with nextPostInc().
394
* @return the first code unit in its iteration range.
395
* @stable ICU 2.0
396
*/
397
virtual UChar firstPostInc(void);
398
399
/**
400
* Sets the iterator to refer to the first code point in its
401
* iteration range, and returns that code unit,
402
* This can be used to begin an iteration with next32().
403
* Note that an iteration with next32PostInc(), beginning with,
404
* e.g., setToStart() or firstPostInc(), is more efficient.
405
* @return the first code point in its iteration range.
406
* @stable ICU 2.0
407
*/
408
virtual UChar32 first32(void) = 0;
409
410
/**
411
* Sets the iterator to refer to the first code point in its
412
* iteration range, returns that code point, and moves the position
413
* to the second code point. This is an alternative to setToStart()
414
* for forward iteration with next32PostInc().
415
* @return the first code point in its iteration range.
416
* @stable ICU 2.0
417
*/
418
virtual UChar32 first32PostInc(void);
419
420
/**
421
* Sets the iterator to refer to the first code unit or code point in its
422
* iteration range. This can be used to begin a forward
423
* iteration with nextPostInc() or next32PostInc().
424
* @return the start position of the iteration range
425
* @stable ICU 2.0
426
*/
427
inline int32_t setToStart();
428
429
/**
430
* Sets the iterator to refer to the last code unit in its
431
* iteration range, and returns that code unit.
432
* This can be used to begin an iteration with previous().
433
* @return the last code unit.
434
* @stable ICU 2.0
435
*/
436
virtual UChar last(void) = 0;
437
438
/**
439
* Sets the iterator to refer to the last code point in its
440
* iteration range, and returns that code unit.
441
* This can be used to begin an iteration with previous32().
442
* @return the last code point.
443
* @stable ICU 2.0
444
*/
445
virtual UChar32 last32(void) = 0;
446
447
/**
448
* Sets the iterator to the end of its iteration range, just behind
449
* the last code unit or code point. This can be used to begin a backward
450
* iteration with previous() or previous32().
451
* @return the end position of the iteration range
452
* @stable ICU 2.0
453
*/
454
inline int32_t setToEnd();
455
456
/**
457
* Sets the iterator to refer to the "position"-th code unit
458
* in the text-storage object the iterator refers to, and
459
* returns that code unit.
460
* @param position the "position"-th code unit in the text-storage object
461
* @return the "position"-th code unit.
462
* @stable ICU 2.0
463
*/
464
virtual UChar setIndex(int32_t position) = 0;
465
466
/**
467
* Sets the iterator to refer to the beginning of the code point
468
* that contains the "position"-th code unit
469
* in the text-storage object the iterator refers to, and
470
* returns that code point.
471
* The current position is adjusted to the beginning of the code point
472
* (its first code unit).
473
* @param position the "position"-th code unit in the text-storage object
474
* @return the "position"-th code point.
475
* @stable ICU 2.0
476
*/
477
virtual UChar32 setIndex32(int32_t position) = 0;
478
479
/**
480
* Returns the code unit the iterator currently refers to.
481
* @return the current code unit.
482
* @stable ICU 2.0
483
*/
484
virtual UChar current(void) const = 0;
485
486
/**
487
* Returns the code point the iterator currently refers to.
488
* @return the current code point.
489
* @stable ICU 2.0
490
*/
491
virtual UChar32 current32(void) const = 0;
492
493
/**
494
* Advances to the next code unit in the iteration range
495
* (toward endIndex()), and returns that code unit. If there are
496
* no more code units to return, returns DONE.
497
* @return the next code unit.
498
* @stable ICU 2.0
499
*/
500
virtual UChar next(void) = 0;
501
502
/**
503
* Advances to the next code point in the iteration range
504
* (toward endIndex()), and returns that code point. If there are
505
* no more code points to return, returns DONE.
506
* Note that iteration with "pre-increment" semantics is less
507
* efficient than iteration with "post-increment" semantics
508
* that is provided by next32PostInc().
509
* @return the next code point.
510
* @stable ICU 2.0
511
*/
512
virtual UChar32 next32(void) = 0;
513
514
/**
515
* Advances to the previous code unit in the iteration range
516
* (toward startIndex()), and returns that code unit. If there are
517
* no more code units to return, returns DONE.
518
* @return the previous code unit.
519
* @stable ICU 2.0
520
*/
521
virtual UChar previous(void) = 0;
522
523
/**
524
* Advances to the previous code point in the iteration range
525
* (toward startIndex()), and returns that code point. If there are
526
* no more code points to return, returns DONE.
527
* @return the previous code point.
528
* @stable ICU 2.0
529
*/
530
virtual UChar32 previous32(void) = 0;
531
532
/**
533
* Returns FALSE if there are no more code units or code points
534
* before the current position in the iteration range.
535
* This is used with previous() or previous32() in backward
536
* iteration.
537
* @return FALSE if there are no more code units or code points
538
* before the current position in the iteration range, return TRUE otherwise.
539
* @stable ICU 2.0
540
*/
541
virtual UBool hasPrevious() = 0;
542
543
/**
544
* Returns the numeric index in the underlying text-storage
545
* object of the character returned by first(). Since it's
546
* possible to create an iterator that iterates across only
547
* part of a text-storage object, this number isn't
548
* necessarily 0.
549
* @returns the numeric index in the underlying text-storage
550
* object of the character returned by first().
551
* @stable ICU 2.0
552
*/
553
inline int32_t startIndex(void) const;
554
555
/**
556
* Returns the numeric index in the underlying text-storage
557
* object of the position immediately BEYOND the character
558
* returned by last().
559
* @return the numeric index in the underlying text-storage
560
* object of the position immediately BEYOND the character
561
* returned by last().
562
* @stable ICU 2.0
563
*/
564
inline int32_t endIndex(void) const;
565
566
/**
567
* Returns the numeric index in the underlying text-storage
568
* object of the character the iterator currently refers to
569
* (i.e., the character returned by current()).
570
* @return the numberic index in the text-storage object of
571
* the character the iterator currently refers to
572
* @stable ICU 2.0
573
*/
574
inline int32_t getIndex(void) const;
575
576
/**
577
* Returns the length of the entire text in the underlying
578
* text-storage object.
579
* @return the length of the entire text in the text-storage object
580
* @stable ICU 2.0
581
*/
582
inline int32_t getLength() const;
583
584
/**
585
* Moves the current position relative to the start or end of the
586
* iteration range, or relative to the current position itself.
587
* The movement is expressed in numbers of code units forward
588
* or backward by specifying a positive or negative delta.
589
* @param delta the position relative to origin. A positive delta means forward;
590
* a negative delta means backward.
591
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
592
* @return the new position
593
* @stable ICU 2.0
594
*/
595
virtual int32_t move(int32_t delta, EOrigin origin) = 0;
596
597
/**
598
* Moves the current position relative to the start or end of the
599
* iteration range, or relative to the current position itself.
600
* The movement is expressed in numbers of code points forward
601
* or backward by specifying a positive or negative delta.
602
* @param delta the position relative to origin. A positive delta means forward;
603
* a negative delta means backward.
604
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
605
* @return the new position
606
* @stable ICU 2.0
607
*/
608
virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
609
610
/**
611
* Copies the text under iteration into the UnicodeString
612
* referred to by "result".
613
* @param result Receives a copy of the text under iteration.
614
* @stable ICU 2.0
615
*/
616
virtual void getText(UnicodeString& result) = 0;
617
618
protected:
619
/**
620
* Empty constructor.
621
* @stable ICU 2.0
622
*/
623
CharacterIterator();
624
625
/**
626
* Constructor, just setting the length field in this base class.
627
* @stable ICU 2.0
628
*/
629
CharacterIterator(int32_t length);
630
631
/**
632
* Constructor, just setting the length and position fields in this base class.
633
* @stable ICU 2.0
634
*/
635
CharacterIterator(int32_t length, int32_t position);
636
637
/**
638
* Constructor, just setting the length, start, end, and position fields in this base class.
639
* @stable ICU 2.0
640
*/
641
CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
642
643
/**
644
* Copy constructor.
645
*
646
* @param that The CharacterIterator to be copied
647
* @stable ICU 2.0
648
*/
649
CharacterIterator(const CharacterIterator &that);
650
651
/**
652
* Assignment operator. Sets this CharacterIterator to have the same behavior,
653
* as the one passed in.
654
* @param that The CharacterIterator passed in.
655
* @return the newly set CharacterIterator.
656
* @stable ICU 2.0
657
*/
658
CharacterIterator &operator=(const CharacterIterator &that);
659
660
/**
661
* Base class text length field.
662
* Necessary this for correct getText() and hashCode().
663
* @stable ICU 2.0
664
*/
665
int32_t textLength;
666
667
/**
668
* Base class field for the current position.
669
* @stable ICU 2.0
670
*/
671
int32_t pos;
672
673
/**
674
* Base class field for the start of the iteration range.
675
* @stable ICU 2.0
676
*/
677
int32_t begin;
678
679
/**
680
* Base class field for the end of the iteration range.
681
* @stable ICU 2.0
682
*/
683
int32_t end;
684
};
685
686
inline UBool
687
ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
688
return !operator==(that);
689
}
690
691
inline int32_t
692
CharacterIterator::setToStart() {
693
return move(0, kStart);
694
}
695
696
inline int32_t
697
CharacterIterator::setToEnd() {
698
return move(0, kEnd);
699
}
700
701
inline int32_t
702
CharacterIterator::startIndex(void) const {
703
return begin;
704
}
705
706
inline int32_t
707
CharacterIterator::endIndex(void) const {
708
return end;
709
}
710
711
inline int32_t
712
CharacterIterator::getIndex(void) const {
713
return pos;
714
}
715
716
inline int32_t
717
CharacterIterator::getLength(void) const {
718
return textLength;
719
}
720
721
U_NAMESPACE_END
722
#endif
723
724