Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-aarch32-jdk8u
Path: blob/jdk8u272-b10-aarch32-20201026/jdk/src/share/native/common/unicode/edits.h
48725 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// edits.h
5
// created: 2016dec30 Markus W. Scherer
6
7
#ifndef __EDITS_H__
8
#define __EDITS_H__
9
10
#include "unicode/utypes.h"
11
#include "unicode/uobject.h"
12
13
/**
14
* \file
15
* \brief C++ API: C++ class Edits for low-level string transformations on styled text.
16
*/
17
18
U_NAMESPACE_BEGIN
19
20
class UnicodeString;
21
22
/**
23
* Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
24
* in linear progression. Does not support moving/reordering of text.
25
*
26
* There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
27
* instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and
28
* {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity,
29
* whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
30
* mapping between code points in the source and destination strings.
31
*
32
* After all edits have been added, instances of this class should be considered immutable, and an
33
* {@link Edits::Iterator} can be used for queries.
34
*
35
* There are four flavors of Edits::Iterator:
36
*
37
* <ul>
38
* <li>{@link #getFineIterator()} retains full granularity of change edits.
39
* <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
40
* next() on the iterator, skips over no-change edits (unchanged regions).
41
* <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
42
* edits are automatically merged during the construction phase.)
43
* <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
44
* calling next() on the iterator, skips over no-change edits (unchanged regions).
45
* </ul>
46
*
47
* For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
48
* following fine edits:
49
* <ul>
50
* <li>abc ⇨ abc (no-change)
51
* <li>ß ⇨ ss (change)
52
* <li>D ⇨ d (change)
53
* <li>e ⇨ e (no-change)
54
* <li>F ⇨ f (change)
55
* </ul>
56
* and the following coarse edits (note how adjacent change edits get merged together):
57
* <ul>
58
* <li>abc ⇨ abc (no-change)
59
* <li>ßD ⇨ ssd (change)
60
* <li>e ⇨ e (no-change)
61
* <li>F ⇨ f (change)
62
* </ul>
63
*
64
* The "fine changes" and "coarse changes" iterators will step through only the change edits when their
65
* `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when
66
* their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()`
67
* methods are used to walk through the string.
68
*
69
* For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in
70
* UCharacterCaseTest.java.
71
*
72
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
73
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
74
*
75
* @stable ICU 59
76
*/
77
class U_COMMON_API Edits U_FINAL : public UMemory {
78
public:
79
/**
80
* Constructs an empty object.
81
* @stable ICU 59
82
*/
83
Edits() :
84
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
85
errorCode_(U_ZERO_ERROR) {}
86
/**
87
* Copy constructor.
88
* @param other source edits
89
* @stable ICU 60
90
*/
91
Edits(const Edits &other) :
92
array(stackArray), capacity(STACK_CAPACITY), length(other.length),
93
delta(other.delta), numChanges(other.numChanges),
94
errorCode_(other.errorCode_) {
95
copyArray(other);
96
}
97
/**
98
* Move constructor, might leave src empty.
99
* This object will have the same contents that the source object had.
100
* @param src source edits
101
* @stable ICU 60
102
*/
103
Edits(Edits &&src) U_NOEXCEPT :
104
array(stackArray), capacity(STACK_CAPACITY), length(src.length),
105
delta(src.delta), numChanges(src.numChanges),
106
errorCode_(src.errorCode_) {
107
moveArray(src);
108
}
109
110
/**
111
* Destructor.
112
* @stable ICU 59
113
*/
114
~Edits();
115
116
/**
117
* Assignment operator.
118
* @param other source edits
119
* @return *this
120
* @stable ICU 60
121
*/
122
Edits &operator=(const Edits &other);
123
124
/**
125
* Move assignment operator, might leave src empty.
126
* This object will have the same contents that the source object had.
127
* The behavior is undefined if *this and src are the same object.
128
* @param src source edits
129
* @return *this
130
* @stable ICU 60
131
*/
132
Edits &operator=(Edits &&src) U_NOEXCEPT;
133
134
/**
135
* Resets the data but may not release memory.
136
* @stable ICU 59
137
*/
138
void reset() U_NOEXCEPT;
139
140
/**
141
* Adds a no-change edit: a record for an unchanged segment of text.
142
* Normally called from inside ICU string transformation functions, not user code.
143
* @stable ICU 59
144
*/
145
void addUnchanged(int32_t unchangedLength);
146
/**
147
* Adds a change edit: a record for a text replacement/insertion/deletion.
148
* Normally called from inside ICU string transformation functions, not user code.
149
* @stable ICU 59
150
*/
151
void addReplace(int32_t oldLength, int32_t newLength);
152
/**
153
* Sets the UErrorCode if an error occurred while recording edits.
154
* Preserves older error codes in the outErrorCode.
155
* Normally called from inside ICU string transformation functions, not user code.
156
* @param outErrorCode Set to an error code if it does not contain one already
157
* and an error occurred while recording edits.
158
* Otherwise unchanged.
159
* @return TRUE if U_FAILURE(outErrorCode)
160
* @stable ICU 59
161
*/
162
UBool copyErrorTo(UErrorCode &outErrorCode);
163
164
/**
165
* How much longer is the new text compared with the old text?
166
* @return new length minus old length
167
* @stable ICU 59
168
*/
169
int32_t lengthDelta() const { return delta; }
170
/**
171
* @return TRUE if there are any change edits
172
* @stable ICU 59
173
*/
174
UBool hasChanges() const { return numChanges != 0; }
175
176
/**
177
* @return the number of change edits
178
* @stable ICU 60
179
*/
180
int32_t numberOfChanges() const { return numChanges; }
181
182
/**
183
* Access to the list of edits.
184
*
185
* At any moment in time, an instance of this class points to a single edit: a "window" into a span
186
* of the source string and the corresponding span of the destination string. The source string span
187
* starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
188
* span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
189
*
190
* The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`,
191
* and `findDestinationIndex(int32_t, UErrorCode &)` methods.
192
* Calling any of these methods mutates the iterator to make it point to the corresponding edit.
193
*
194
* For more information, see the documentation for {@link Edits}.
195
*
196
* @see getCoarseIterator
197
* @see getFineIterator
198
* @stable ICU 59
199
*/
200
struct U_COMMON_API Iterator U_FINAL : public UMemory {
201
/**
202
* Default constructor, empty iterator.
203
* @stable ICU 60
204
*/
205
Iterator() :
206
array(nullptr), index(0), length(0),
207
remaining(0), onlyChanges_(FALSE), coarse(FALSE),
208
dir(0), changed(FALSE), oldLength_(0), newLength_(0),
209
srcIndex(0), replIndex(0), destIndex(0) {}
210
/**
211
* Copy constructor.
212
* @stable ICU 59
213
*/
214
Iterator(const Iterator &other) = default;
215
/**
216
* Assignment operator.
217
* @stable ICU 59
218
*/
219
Iterator &operator=(const Iterator &other) = default;
220
221
/**
222
* Advances the iterator to the next edit.
223
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
224
* or else the function returns immediately. Check for U_FAILURE()
225
* on output or use with function chaining. (See User Guide for details.)
226
* @return TRUE if there is another edit
227
* @stable ICU 59
228
*/
229
UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
230
231
/**
232
* Moves the iterator to the edit that contains the source index.
233
* The source index may be found in a no-change edit
234
* even if normal iteration would skip no-change edits.
235
* Normal iteration can continue from a found edit.
236
*
237
* The iterator state before this search logically does not matter.
238
* (It may affect the performance of the search.)
239
*
240
* The iterator state after this search is undefined
241
* if the source index is out of bounds for the source string.
242
*
243
* @param i source index
244
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
245
* or else the function returns immediately. Check for U_FAILURE()
246
* on output or use with function chaining. (See User Guide for details.)
247
* @return TRUE if the edit for the source index was found
248
* @stable ICU 59
249
*/
250
UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
251
return findIndex(i, TRUE, errorCode) == 0;
252
}
253
254
/**
255
* Moves the iterator to the edit that contains the destination index.
256
* The destination index may be found in a no-change edit
257
* even if normal iteration would skip no-change edits.
258
* Normal iteration can continue from a found edit.
259
*
260
* The iterator state before this search logically does not matter.
261
* (It may affect the performance of the search.)
262
*
263
* The iterator state after this search is undefined
264
* if the source index is out of bounds for the source string.
265
*
266
* @param i destination index
267
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
268
* or else the function returns immediately. Check for U_FAILURE()
269
* on output or use with function chaining. (See User Guide for details.)
270
* @return TRUE if the edit for the destination index was found
271
* @stable ICU 60
272
*/
273
UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
274
return findIndex(i, FALSE, errorCode) == 0;
275
}
276
277
/**
278
* Computes the destination index corresponding to the given source index.
279
* If the source index is inside a change edit (not at its start),
280
* then the destination index at the end of that edit is returned,
281
* since there is no information about index mapping inside a change edit.
282
*
283
* (This means that indexes to the start and middle of an edit,
284
* for example around a grapheme cluster, are mapped to indexes
285
* encompassing the entire edit.
286
* The alternative, mapping an interior index to the start,
287
* would map such an interval to an empty one.)
288
*
289
* This operation will usually but not always modify this object.
290
* The iterator state after this search is undefined.
291
*
292
* @param i source index
293
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
294
* or else the function returns immediately. Check for U_FAILURE()
295
* on output or use with function chaining. (See User Guide for details.)
296
* @return destination index; undefined if i is not 0..string length
297
* @stable ICU 60
298
*/
299
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
300
301
/**
302
* Computes the source index corresponding to the given destination index.
303
* If the destination index is inside a change edit (not at its start),
304
* then the source index at the end of that edit is returned,
305
* since there is no information about index mapping inside a change edit.
306
*
307
* (This means that indexes to the start and middle of an edit,
308
* for example around a grapheme cluster, are mapped to indexes
309
* encompassing the entire edit.
310
* The alternative, mapping an interior index to the start,
311
* would map such an interval to an empty one.)
312
*
313
* This operation will usually but not always modify this object.
314
* The iterator state after this search is undefined.
315
*
316
* @param i destination index
317
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
318
* or else the function returns immediately. Check for U_FAILURE()
319
* on output or use with function chaining. (See User Guide for details.)
320
* @return source index; undefined if i is not 0..string length
321
* @stable ICU 60
322
*/
323
int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
324
325
/**
326
* Returns whether the edit currently represented by the iterator is a change edit.
327
*
328
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
329
* FALSE if oldLength units remain unchanged.
330
* @stable ICU 59
331
*/
332
UBool hasChange() const { return changed; }
333
334
/**
335
* The length of the current span in the source string, which starts at {@link #sourceIndex}.
336
*
337
* @return the number of units in the original string which are replaced or remain unchanged.
338
* @stable ICU 59
339
*/
340
int32_t oldLength() const { return oldLength_; }
341
342
/**
343
* The length of the current span in the destination string, which starts at
344
* {@link #destinationIndex}, or in the replacement string, which starts at
345
* {@link #replacementIndex}.
346
*
347
* @return the number of units in the modified string, if hasChange() is TRUE.
348
* Same as oldLength if hasChange() is FALSE.
349
* @stable ICU 59
350
*/
351
int32_t newLength() const { return newLength_; }
352
353
/**
354
* The start index of the current span in the source string; the span has length
355
* {@link #oldLength}.
356
*
357
* @return the current index into the source string
358
* @stable ICU 59
359
*/
360
int32_t sourceIndex() const { return srcIndex; }
361
362
/**
363
* The start index of the current span in the replacement string; the span has length
364
* {@link #newLength}. Well-defined only if the current edit is a change edit.
365
*
366
* The *replacement string* is the concatenation of all substrings of the destination
367
* string corresponding to change edits.
368
*
369
* This method is intended to be used together with operations that write only replacement
370
* characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option).
371
* The source string can then be modified in-place.
372
*
373
* @return the current index into the replacement-characters-only string,
374
* not counting unchanged spans
375
* @stable ICU 59
376
*/
377
int32_t replacementIndex() const {
378
// TODO: Throw an exception if we aren't in a change edit?
379
return replIndex;
380
}
381
382
/**
383
* The start index of the current span in the destination string; the span has length
384
* {@link #newLength}.
385
*
386
* @return the current index into the full destination string
387
* @stable ICU 59
388
*/
389
int32_t destinationIndex() const { return destIndex; }
390
391
#ifndef U_HIDE_INTERNAL_API
392
/**
393
* A string representation of the current edit represented by the iterator for debugging. You
394
* should not depend on the contents of the return string.
395
* @internal
396
*/
397
UnicodeString& toString(UnicodeString& appendTo) const;
398
#endif // U_HIDE_INTERNAL_API
399
400
private:
401
friend class Edits;
402
403
Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
404
405
int32_t readLength(int32_t head);
406
void updateNextIndexes();
407
void updatePreviousIndexes();
408
UBool noNext();
409
UBool next(UBool onlyChanges, UErrorCode &errorCode);
410
UBool previous(UErrorCode &errorCode);
411
/** @return -1: error or i<0; 0: found; 1: i>=string length */
412
int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);
413
414
const uint16_t *array;
415
int32_t index, length;
416
// 0 if we are not within compressed equal-length changes.
417
// Otherwise the number of remaining changes, including the current one.
418
int32_t remaining;
419
UBool onlyChanges_, coarse;
420
421
int8_t dir; // iteration direction: back(<0), initial(0), forward(>0)
422
UBool changed;
423
int32_t oldLength_, newLength_;
424
int32_t srcIndex, replIndex, destIndex;
425
};
426
427
/**
428
* Returns an Iterator for coarse-grained change edits
429
* (adjacent change edits are treated as one).
430
* Can be used to perform simple string updates.
431
* Skips no-change edits.
432
* @return an Iterator that merges adjacent changes.
433
* @stable ICU 59
434
*/
435
Iterator getCoarseChangesIterator() const {
436
return Iterator(array, length, TRUE, TRUE);
437
}
438
439
/**
440
* Returns an Iterator for coarse-grained change and no-change edits
441
* (adjacent change edits are treated as one).
442
* Can be used to perform simple string updates.
443
* Adjacent change edits are treated as one edit.
444
* @return an Iterator that merges adjacent changes.
445
* @stable ICU 59
446
*/
447
Iterator getCoarseIterator() const {
448
return Iterator(array, length, FALSE, TRUE);
449
}
450
451
/**
452
* Returns an Iterator for fine-grained change edits
453
* (full granularity of change edits is retained).
454
* Can be used for modifying styled text.
455
* Skips no-change edits.
456
* @return an Iterator that separates adjacent changes.
457
* @stable ICU 59
458
*/
459
Iterator getFineChangesIterator() const {
460
return Iterator(array, length, TRUE, FALSE);
461
}
462
463
/**
464
* Returns an Iterator for fine-grained change and no-change edits
465
* (full granularity of change edits is retained).
466
* Can be used for modifying styled text.
467
* @return an Iterator that separates adjacent changes.
468
* @stable ICU 59
469
*/
470
Iterator getFineIterator() const {
471
return Iterator(array, length, FALSE, FALSE);
472
}
473
474
/**
475
* Merges the two input Edits and appends the result to this object.
476
*
477
* Consider two string transformations (for example, normalization and case mapping)
478
* where each records Edits in addition to writing an output string.<br>
479
* Edits ab reflect how substrings of input string a
480
* map to substrings of intermediate string b.<br>
481
* Edits bc reflect how substrings of intermediate string b
482
* map to substrings of output string c.<br>
483
* This function merges ab and bc such that the additional edits
484
* recorded in this object reflect how substrings of input string a
485
* map to substrings of output string c.
486
*
487
* If unrelated Edits are passed in where the output string of the first
488
* has a different length than the input string of the second,
489
* then a U_ILLEGAL_ARGUMENT_ERROR is reported.
490
*
491
* @param ab reflects how substrings of input string a
492
* map to substrings of intermediate string b.
493
* @param bc reflects how substrings of intermediate string b
494
* map to substrings of output string c.
495
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
496
* or else the function returns immediately. Check for U_FAILURE()
497
* on output or use with function chaining. (See User Guide for details.)
498
* @return *this, with the merged edits appended
499
* @stable ICU 60
500
*/
501
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
502
503
private:
504
void releaseArray() U_NOEXCEPT;
505
Edits &copyArray(const Edits &other);
506
Edits &moveArray(Edits &src) U_NOEXCEPT;
507
508
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
509
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
510
511
void append(int32_t r);
512
UBool growArray();
513
514
static const int32_t STACK_CAPACITY = 100;
515
uint16_t *array;
516
int32_t capacity;
517
int32_t length;
518
int32_t delta;
519
int32_t numChanges;
520
UErrorCode errorCode_;
521
uint16_t stackArray[STACK_CAPACITY];
522
};
523
524
U_NAMESPACE_END
525
526
#endif // __EDITS_H__
527
528