Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-aarch32-jdk8u
Path: blob/jdk8u272-b10-aarch32-20201026/jdk/src/share/native/common/unicode/chariter.h
48729 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
********************************************************************
5
*
6
* Copyright (C) 1997-2011, International Business Machines
7
* Corporation and others. All Rights Reserved.
8
*
9
********************************************************************
10
*/
11
12
#ifndef CHARITER_H
13
#define CHARITER_H
14
15
#include "unicode/utypes.h"
16
#include "unicode/uobject.h"
17
#include "unicode/unistr.h"
18
/**
19
* \file
20
* \brief C++ API: Character Iterator
21
*/
22
23
U_NAMESPACE_BEGIN
24
/**
25
* Abstract class that defines an API for forward-only iteration
26
* on text objects.
27
* This is a minimal interface for iteration without random access
28
* or backwards iteration. It is especially useful for wrapping
29
* streams with converters into an object for collation or
30
* normalization.
31
*
32
* <p>Characters can be accessed in two ways: as code units or as
33
* code points.
34
* Unicode code points are 21-bit integers and are the scalar values
35
* of Unicode characters. ICU uses the type UChar32 for them.
36
* Unicode code units are the storage units of a given
37
* Unicode/UCS Transformation Format (a character encoding scheme).
38
* With UTF-16, all code points can be represented with either one
39
* or two code units ("surrogates").
40
* String storage is typically based on code units, while properties
41
* of characters are typically determined using code point values.
42
* Some processes may be designed to work with sequences of code units,
43
* or it may be known that all characters that are important to an
44
* algorithm can be represented with single code units.
45
* Other processes will need to use the code point access functions.</p>
46
*
47
* <p>ForwardCharacterIterator provides nextPostInc() to access
48
* a code unit and advance an internal position into the text object,
49
* similar to a <code>return text[position++]</code>.<br>
50
* It provides next32PostInc() to access a code point and advance an internal
51
* position.</p>
52
*
53
* <p>next32PostInc() assumes that the current position is that of
54
* the beginning of a code point, i.e., of its first code unit.
55
* After next32PostInc(), this will be true again.
56
* In general, access to code units and code points in the same
57
* iteration loop should not be mixed. In UTF-16, if the current position
58
* is on a second code unit (Low Surrogate), then only that code unit
59
* is returned even by next32PostInc().</p>
60
*
61
* <p>For iteration with either function, there are two ways to
62
* check for the end of the iteration. When there are no more
63
* characters in the text object:
64
* <ul>
65
* <li>The hasNext() function returns FALSE.</li>
66
* <li>nextPostInc() and next32PostInc() return DONE
67
* when one attempts to read beyond the end of the text object.</li>
68
* </ul>
69
*
70
* Example:
71
* \code
72
* void function1(ForwardCharacterIterator &it) {
73
* UChar32 c;
74
* while(it.hasNext()) {
75
* c=it.next32PostInc();
76
* // use c
77
* }
78
* }
79
*
80
* void function1(ForwardCharacterIterator &it) {
81
* char16_t c;
82
* while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
83
* // use c
84
* }
85
* }
86
* \endcode
87
* </p>
88
*
89
* @stable ICU 2.0
90
*/
91
class U_COMMON_API ForwardCharacterIterator : public UObject {
92
public:
93
/**
94
* Value returned by most of ForwardCharacterIterator's functions
95
* when the iterator has reached the limits of its iteration.
96
* @stable ICU 2.0
97
*/
98
enum { DONE = 0xffff };
99
100
/**
101
* Destructor.
102
* @stable ICU 2.0
103
*/
104
virtual ~ForwardCharacterIterator();
105
106
/**
107
* Returns true when both iterators refer to the same
108
* character in the same character-storage object.
109
* @param that The ForwardCharacterIterator to be compared for equality
110
* @return true when both iterators refer to the same
111
* character in the same character-storage object
112
* @stable ICU 2.0
113
*/
114
virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
115
116
/**
117
* Returns true when the iterators refer to different
118
* text-storage objects, or to different characters in the
119
* same text-storage object.
120
* @param that The ForwardCharacterIterator to be compared for inequality
121
* @return true when the iterators refer to different
122
* text-storage objects, or to different characters in the
123
* same text-storage object
124
* @stable ICU 2.0
125
*/
126
inline UBool operator!=(const ForwardCharacterIterator& that) const;
127
128
/**
129
* Generates a hash code for this iterator.
130
* @return the hash code.
131
* @stable ICU 2.0
132
*/
133
virtual int32_t hashCode(void) const = 0;
134
135
/**
136
* Returns a UClassID for this ForwardCharacterIterator ("poor man's
137
* RTTI").<P> Despite the fact that this function is public,
138
* DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
139
* @return a UClassID for this ForwardCharacterIterator
140
* @stable ICU 2.0
141
*/
142
virtual UClassID getDynamicClassID(void) const = 0;
143
144
/**
145
* Gets the current code unit for returning and advances to the next code unit
146
* in the iteration range
147
* (toward endIndex()). If there are
148
* no more code units to return, returns DONE.
149
* @return the current code unit.
150
* @stable ICU 2.0
151
*/
152
virtual char16_t nextPostInc(void) = 0;
153
154
/**
155
* Gets the current code point for returning and advances to the next code point
156
* in the iteration range
157
* (toward endIndex()). If there are
158
* no more code points to return, returns DONE.
159
* @return the current code point.
160
* @stable ICU 2.0
161
*/
162
virtual UChar32 next32PostInc(void) = 0;
163
164
/**
165
* Returns FALSE if there are no more code units or code points
166
* at or after the current position in the iteration range.
167
* This is used with nextPostInc() or next32PostInc() in forward
168
* iteration.
169
* @returns FALSE if there are no more code units or code points
170
* at or after the current position in the iteration range.
171
* @stable ICU 2.0
172
*/
173
virtual UBool hasNext() = 0;
174
175
protected:
176
/** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
177
ForwardCharacterIterator();
178
179
/** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
180
ForwardCharacterIterator(const ForwardCharacterIterator &other);
181
182
/**
183
* Assignment operator to be overridden in the implementing class.
184
* @stable ICU 2.0
185
*/
186
ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
187
};
188
189
/**
190
* Abstract class that defines an API for iteration
191
* on text objects.
192
* This is an interface for forward and backward iteration
193
* and random access into a text object.
194
*
195
* <p>The API provides backward compatibility to the Java and older ICU
196
* CharacterIterator classes but extends them significantly:
197
* <ol>
198
* <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
199
* <li>While the old API functions provided forward iteration with
200
* "pre-increment" semantics, the new one also provides functions
201
* with "post-increment" semantics. They are more efficient and should
202
* be the preferred iterator functions for new implementations.
203
* The backward iteration always had "pre-decrement" semantics, which
204
* are efficient.</li>
205
* <li>Just like ForwardCharacterIterator, it provides access to
206
* both code units and code points. Code point access versions are available
207
* for the old and the new iteration semantics.</li>
208
* <li>There are new functions for setting and moving the current position
209
* without returning a character, for efficiency.</li>
210
* </ol>
211
*
212
* See ForwardCharacterIterator for examples for using the new forward iteration
213
* functions. For backward iteration, there is also a hasPrevious() function
214
* that can be used analogously to hasNext().
215
* The old functions work as before and are shown below.</p>
216
*
217
* <p>Examples for some of the new functions:</p>
218
*
219
* Forward iteration with hasNext():
220
* \code
221
* void forward1(CharacterIterator &it) {
222
* UChar32 c;
223
* for(it.setToStart(); it.hasNext();) {
224
* c=it.next32PostInc();
225
* // use c
226
* }
227
* }
228
* \endcode
229
* Forward iteration more similar to loops with the old forward iteration,
230
* showing a way to convert simple for() loops:
231
* \code
232
* void forward2(CharacterIterator &it) {
233
* char16_t c;
234
* for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
235
* // use c
236
* }
237
* }
238
* \endcode
239
* Backward iteration with setToEnd() and hasPrevious():
240
* \code
241
* void backward1(CharacterIterator &it) {
242
* UChar32 c;
243
* for(it.setToEnd(); it.hasPrevious();) {
244
* c=it.previous32();
245
* // use c
246
* }
247
* }
248
* \endcode
249
* Backward iteration with a more traditional for() loop:
250
* \code
251
* void backward2(CharacterIterator &it) {
252
* char16_t c;
253
* for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
254
* // use c
255
* }
256
* }
257
* \endcode
258
*
259
* Example for random access:
260
* \code
261
* void random(CharacterIterator &it) {
262
* // set to the third code point from the beginning
263
* it.move32(3, CharacterIterator::kStart);
264
* // get a code point from here without moving the position
265
* UChar32 c=it.current32();
266
* // get the position
267
* int32_t pos=it.getIndex();
268
* // get the previous code unit
269
* char16_t u=it.previous();
270
* // move back one more code unit
271
* it.move(-1, CharacterIterator::kCurrent);
272
* // set the position back to where it was
273
* // and read the same code point c and move beyond it
274
* it.setIndex(pos);
275
* if(c!=it.next32PostInc()) {
276
* exit(1); // CharacterIterator inconsistent
277
* }
278
* }
279
* \endcode
280
*
281
* <p>Examples, especially for the old API:</p>
282
*
283
* Function processing characters, in this example simple output
284
* <pre>
285
* \code
286
* void processChar( char16_t c )
287
* {
288
* cout << " " << c;
289
* }
290
* \endcode
291
* </pre>
292
* Traverse the text from start to finish
293
* <pre>
294
* \code
295
* void traverseForward(CharacterIterator& iter)
296
* {
297
* for(char16_t c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
298
* processChar(c);
299
* }
300
* }
301
* \endcode
302
* </pre>
303
* Traverse the text backwards, from end to start
304
* <pre>
305
* \code
306
* void traverseBackward(CharacterIterator& iter)
307
* {
308
* for(char16_t c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
309
* processChar(c);
310
* }
311
* }
312
* \endcode
313
* </pre>
314
* Traverse both forward and backward from a given position in the text.
315
* Calls to notBoundary() in this example represents some additional stopping criteria.
316
* <pre>
317
* \code
318
* void traverseOut(CharacterIterator& iter, int32_t pos)
319
* {
320
* char16_t c;
321
* for (c = iter.setIndex(pos);
322
* c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
323
* c = iter.next()) {}
324
* int32_t end = iter.getIndex();
325
* for (c = iter.setIndex(pos);
326
* c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
327
* c = iter.previous()) {}
328
* int32_t start = iter.getIndex() + 1;
329
*
330
* cout << "start: " << start << " end: " << end << endl;
331
* for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
332
* processChar(c);
333
* }
334
* }
335
* \endcode
336
* </pre>
337
* Creating a StringCharacterIterator and calling the test functions
338
* <pre>
339
* \code
340
* void CharacterIterator_Example( void )
341
* {
342
* cout << endl << "===== CharacterIterator_Example: =====" << endl;
343
* UnicodeString text("Ein kleiner Satz.");
344
* StringCharacterIterator iterator(text);
345
* cout << "----- traverseForward: -----------" << endl;
346
* traverseForward( iterator );
347
* cout << endl << endl << "----- traverseBackward: ----------" << endl;
348
* traverseBackward( iterator );
349
* cout << endl << endl << "----- traverseOut: ---------------" << endl;
350
* traverseOut( iterator, 7 );
351
* cout << endl << endl << "-----" << endl;
352
* }
353
* \endcode
354
* </pre>
355
*
356
* @stable ICU 2.0
357
*/
358
class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
359
public:
360
/**
361
* Origin enumeration for the move() and move32() functions.
362
* @stable ICU 2.0
363
*/
364
enum EOrigin { kStart, kCurrent, kEnd };
365
366
/**
367
* Destructor.
368
* @stable ICU 2.0
369
*/
370
virtual ~CharacterIterator();
371
372
/**
373
* Returns a pointer to a new CharacterIterator of the same
374
* concrete class as this one, and referring to the same
375
* character in the same text-storage object as this one. The
376
* caller is responsible for deleting the new clone.
377
* @return a pointer to a new CharacterIterator
378
* @stable ICU 2.0
379
*/
380
virtual CharacterIterator* clone(void) const = 0;
381
382
/**
383
* Sets the iterator to refer to the first code unit in its
384
* iteration range, and returns that code unit.
385
* This can be used to begin an iteration with next().
386
* @return the first code unit in its iteration range.
387
* @stable ICU 2.0
388
*/
389
virtual char16_t first(void) = 0;
390
391
/**
392
* Sets the iterator to refer to the first code unit in its
393
* iteration range, returns that code unit, and moves the position
394
* to the second code unit. This is an alternative to setToStart()
395
* for forward iteration with nextPostInc().
396
* @return the first code unit in its iteration range.
397
* @stable ICU 2.0
398
*/
399
virtual char16_t firstPostInc(void);
400
401
/**
402
* Sets the iterator to refer to the first code point in its
403
* iteration range, and returns that code unit,
404
* This can be used to begin an iteration with next32().
405
* Note that an iteration with next32PostInc(), beginning with,
406
* e.g., setToStart() or firstPostInc(), is more efficient.
407
* @return the first code point in its iteration range.
408
* @stable ICU 2.0
409
*/
410
virtual UChar32 first32(void) = 0;
411
412
/**
413
* Sets the iterator to refer to the first code point in its
414
* iteration range, returns that code point, and moves the position
415
* to the second code point. This is an alternative to setToStart()
416
* for forward iteration with next32PostInc().
417
* @return the first code point in its iteration range.
418
* @stable ICU 2.0
419
*/
420
virtual UChar32 first32PostInc(void);
421
422
/**
423
* Sets the iterator to refer to the first code unit or code point in its
424
* iteration range. This can be used to begin a forward
425
* iteration with nextPostInc() or next32PostInc().
426
* @return the start position of the iteration range
427
* @stable ICU 2.0
428
*/
429
inline int32_t setToStart();
430
431
/**
432
* Sets the iterator to refer to the last code unit in its
433
* iteration range, and returns that code unit.
434
* This can be used to begin an iteration with previous().
435
* @return the last code unit.
436
* @stable ICU 2.0
437
*/
438
virtual char16_t last(void) = 0;
439
440
/**
441
* Sets the iterator to refer to the last code point in its
442
* iteration range, and returns that code unit.
443
* This can be used to begin an iteration with previous32().
444
* @return the last code point.
445
* @stable ICU 2.0
446
*/
447
virtual UChar32 last32(void) = 0;
448
449
/**
450
* Sets the iterator to the end of its iteration range, just behind
451
* the last code unit or code point. This can be used to begin a backward
452
* iteration with previous() or previous32().
453
* @return the end position of the iteration range
454
* @stable ICU 2.0
455
*/
456
inline int32_t setToEnd();
457
458
/**
459
* Sets the iterator to refer to the "position"-th code unit
460
* in the text-storage object the iterator refers to, and
461
* returns that code unit.
462
* @param position the "position"-th code unit in the text-storage object
463
* @return the "position"-th code unit.
464
* @stable ICU 2.0
465
*/
466
virtual char16_t setIndex(int32_t position) = 0;
467
468
/**
469
* Sets the iterator to refer to the beginning of the code point
470
* that contains the "position"-th code unit
471
* in the text-storage object the iterator refers to, and
472
* returns that code point.
473
* The current position is adjusted to the beginning of the code point
474
* (its first code unit).
475
* @param position the "position"-th code unit in the text-storage object
476
* @return the "position"-th code point.
477
* @stable ICU 2.0
478
*/
479
virtual UChar32 setIndex32(int32_t position) = 0;
480
481
/**
482
* Returns the code unit the iterator currently refers to.
483
* @return the current code unit.
484
* @stable ICU 2.0
485
*/
486
virtual char16_t current(void) const = 0;
487
488
/**
489
* Returns the code point the iterator currently refers to.
490
* @return the current code point.
491
* @stable ICU 2.0
492
*/
493
virtual UChar32 current32(void) const = 0;
494
495
/**
496
* Advances to the next code unit in the iteration range
497
* (toward endIndex()), and returns that code unit. If there are
498
* no more code units to return, returns DONE.
499
* @return the next code unit.
500
* @stable ICU 2.0
501
*/
502
virtual char16_t next(void) = 0;
503
504
/**
505
* Advances to the next code point in the iteration range
506
* (toward endIndex()), and returns that code point. If there are
507
* no more code points to return, returns DONE.
508
* Note that iteration with "pre-increment" semantics is less
509
* efficient than iteration with "post-increment" semantics
510
* that is provided by next32PostInc().
511
* @return the next code point.
512
* @stable ICU 2.0
513
*/
514
virtual UChar32 next32(void) = 0;
515
516
/**
517
* Advances to the previous code unit in the iteration range
518
* (toward startIndex()), and returns that code unit. If there are
519
* no more code units to return, returns DONE.
520
* @return the previous code unit.
521
* @stable ICU 2.0
522
*/
523
virtual char16_t previous(void) = 0;
524
525
/**
526
* Advances to the previous code point in the iteration range
527
* (toward startIndex()), and returns that code point. If there are
528
* no more code points to return, returns DONE.
529
* @return the previous code point.
530
* @stable ICU 2.0
531
*/
532
virtual UChar32 previous32(void) = 0;
533
534
/**
535
* Returns FALSE if there are no more code units or code points
536
* before the current position in the iteration range.
537
* This is used with previous() or previous32() in backward
538
* iteration.
539
* @return FALSE if there are no more code units or code points
540
* before the current position in the iteration range, return TRUE otherwise.
541
* @stable ICU 2.0
542
*/
543
virtual UBool hasPrevious() = 0;
544
545
/**
546
* Returns the numeric index in the underlying text-storage
547
* object of the character returned by first(). Since it's
548
* possible to create an iterator that iterates across only
549
* part of a text-storage object, this number isn't
550
* necessarily 0.
551
* @returns the numeric index in the underlying text-storage
552
* object of the character returned by first().
553
* @stable ICU 2.0
554
*/
555
inline int32_t startIndex(void) const;
556
557
/**
558
* Returns the numeric index in the underlying text-storage
559
* object of the position immediately BEYOND the character
560
* returned by last().
561
* @return the numeric index in the underlying text-storage
562
* object of the position immediately BEYOND the character
563
* returned by last().
564
* @stable ICU 2.0
565
*/
566
inline int32_t endIndex(void) const;
567
568
/**
569
* Returns the numeric index in the underlying text-storage
570
* object of the character the iterator currently refers to
571
* (i.e., the character returned by current()).
572
* @return the numeric index in the text-storage object of
573
* the character the iterator currently refers to
574
* @stable ICU 2.0
575
*/
576
inline int32_t getIndex(void) const;
577
578
/**
579
* Returns the length of the entire text in the underlying
580
* text-storage object.
581
* @return the length of the entire text in the text-storage object
582
* @stable ICU 2.0
583
*/
584
inline int32_t getLength() const;
585
586
/**
587
* Moves the current position relative to the start or end of the
588
* iteration range, or relative to the current position itself.
589
* The movement is expressed in numbers of code units forward
590
* or backward by specifying a positive or negative delta.
591
* @param delta the position relative to origin. A positive delta means forward;
592
* a negative delta means backward.
593
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
594
* @return the new position
595
* @stable ICU 2.0
596
*/
597
virtual int32_t move(int32_t delta, EOrigin origin) = 0;
598
599
/**
600
* Moves the current position relative to the start or end of the
601
* iteration range, or relative to the current position itself.
602
* The movement is expressed in numbers of code points forward
603
* or backward by specifying a positive or negative delta.
604
* @param delta the position relative to origin. A positive delta means forward;
605
* a negative delta means backward.
606
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
607
* @return the new position
608
* @stable ICU 2.0
609
*/
610
#ifdef move32
611
// One of the system headers right now is sometimes defining a conflicting macro we don't use
612
#undef move32
613
#endif
614
virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
615
616
/**
617
* Copies the text under iteration into the UnicodeString
618
* referred to by "result".
619
* @param result Receives a copy of the text under iteration.
620
* @stable ICU 2.0
621
*/
622
virtual void getText(UnicodeString& result) = 0;
623
624
protected:
625
/**
626
* Empty constructor.
627
* @stable ICU 2.0
628
*/
629
CharacterIterator();
630
631
/**
632
* Constructor, just setting the length field in this base class.
633
* @stable ICU 2.0
634
*/
635
CharacterIterator(int32_t length);
636
637
/**
638
* Constructor, just setting the length and position fields in this base class.
639
* @stable ICU 2.0
640
*/
641
CharacterIterator(int32_t length, int32_t position);
642
643
/**
644
* Constructor, just setting the length, start, end, and position fields in this base class.
645
* @stable ICU 2.0
646
*/
647
CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
648
649
/**
650
* Copy constructor.
651
*
652
* @param that The CharacterIterator to be copied
653
* @stable ICU 2.0
654
*/
655
CharacterIterator(const CharacterIterator &that);
656
657
/**
658
* Assignment operator. Sets this CharacterIterator to have the same behavior,
659
* as the one passed in.
660
* @param that The CharacterIterator passed in.
661
* @return the newly set CharacterIterator.
662
* @stable ICU 2.0
663
*/
664
CharacterIterator &operator=(const CharacterIterator &that);
665
666
/**
667
* Base class text length field.
668
* Necessary this for correct getText() and hashCode().
669
* @stable ICU 2.0
670
*/
671
int32_t textLength;
672
673
/**
674
* Base class field for the current position.
675
* @stable ICU 2.0
676
*/
677
int32_t pos;
678
679
/**
680
* Base class field for the start of the iteration range.
681
* @stable ICU 2.0
682
*/
683
int32_t begin;
684
685
/**
686
* Base class field for the end of the iteration range.
687
* @stable ICU 2.0
688
*/
689
int32_t end;
690
};
691
692
inline UBool
693
ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
694
return !operator==(that);
695
}
696
697
inline int32_t
698
CharacterIterator::setToStart() {
699
return move(0, kStart);
700
}
701
702
inline int32_t
703
CharacterIterator::setToEnd() {
704
return move(0, kEnd);
705
}
706
707
inline int32_t
708
CharacterIterator::startIndex(void) const {
709
return begin;
710
}
711
712
inline int32_t
713
CharacterIterator::endIndex(void) const {
714
return end;
715
}
716
717
inline int32_t
718
CharacterIterator::getIndex(void) const {
719
return pos;
720
}
721
722
inline int32_t
723
CharacterIterator::getLength(void) const {
724
return textLength;
725
}
726
727
U_NAMESPACE_END
728
#endif
729
730