Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/edits.h
38827 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// edits.h
5
// created: 2016dec30 Markus W. Scherer
6
7
#ifndef __EDITS_H__
8
#define __EDITS_H__
9
10
#include "unicode/utypes.h"
11
12
#if U_SHOW_CPLUSPLUS_API
13
14
#include "unicode/uobject.h"
15
16
/**
17
* \file
18
* \brief C++ API: C++ class Edits for low-level string transformations on styled text.
19
*/
20
21
U_NAMESPACE_BEGIN
22
23
class UnicodeString;
24
25
/**
26
* Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
27
* in linear progression. Does not support moving/reordering of text.
28
*
29
* There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
30
* instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and
31
* {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity,
32
* whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
33
* mapping between code points in the source and destination strings.
34
*
35
* After all edits have been added, instances of this class should be considered immutable, and an
36
* {@link Edits::Iterator} can be used for queries.
37
*
38
* There are four flavors of Edits::Iterator:
39
*
40
* <ul>
41
* <li>{@link #getFineIterator()} retains full granularity of change edits.
42
* <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
43
* next() on the iterator, skips over no-change edits (unchanged regions).
44
* <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
45
* edits are automatically merged during the construction phase.)
46
* <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
47
* calling next() on the iterator, skips over no-change edits (unchanged regions).
48
* </ul>
49
*
50
* For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
51
* following fine edits:
52
* <ul>
53
* <li>abc ⇨ abc (no-change)
54
* <li>ß ⇨ ss (change)
55
* <li>D ⇨ d (change)
56
* <li>e ⇨ e (no-change)
57
* <li>F ⇨ f (change)
58
* </ul>
59
* and the following coarse edits (note how adjacent change edits get merged together):
60
* <ul>
61
* <li>abc ⇨ abc (no-change)
62
* <li>ßD ⇨ ssd (change)
63
* <li>e ⇨ e (no-change)
64
* <li>F ⇨ f (change)
65
* </ul>
66
*
67
* The "fine changes" and "coarse changes" iterators will step through only the change edits when their
68
* `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when
69
* their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()`
70
* methods are used to walk through the string.
71
*
72
* For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in
73
* UCharacterCaseTest.java.
74
*
75
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
76
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
77
*
78
* @stable ICU 59
79
*/
80
class U_COMMON_API Edits U_FINAL : public UMemory {
81
public:
82
/**
83
* Constructs an empty object.
84
* @stable ICU 59
85
*/
86
Edits() :
87
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
88
errorCode_(U_ZERO_ERROR) {}
89
/**
90
* Copy constructor.
91
* @param other source edits
92
* @stable ICU 60
93
*/
94
Edits(const Edits &other) :
95
array(stackArray), capacity(STACK_CAPACITY), length(other.length),
96
delta(other.delta), numChanges(other.numChanges),
97
errorCode_(other.errorCode_) {
98
copyArray(other);
99
}
100
/**
101
* Move constructor, might leave src empty.
102
* This object will have the same contents that the source object had.
103
* @param src source edits
104
* @stable ICU 60
105
*/
106
Edits(Edits &&src) U_NOEXCEPT :
107
array(stackArray), capacity(STACK_CAPACITY), length(src.length),
108
delta(src.delta), numChanges(src.numChanges),
109
errorCode_(src.errorCode_) {
110
moveArray(src);
111
}
112
113
/**
114
* Destructor.
115
* @stable ICU 59
116
*/
117
~Edits();
118
119
/**
120
* Assignment operator.
121
* @param other source edits
122
* @return *this
123
* @stable ICU 60
124
*/
125
Edits &operator=(const Edits &other);
126
127
/**
128
* Move assignment operator, might leave src empty.
129
* This object will have the same contents that the source object had.
130
* The behavior is undefined if *this and src are the same object.
131
* @param src source edits
132
* @return *this
133
* @stable ICU 60
134
*/
135
Edits &operator=(Edits &&src) U_NOEXCEPT;
136
137
/**
138
* Resets the data but may not release memory.
139
* @stable ICU 59
140
*/
141
void reset() U_NOEXCEPT;
142
143
/**
144
* Adds a no-change edit: a record for an unchanged segment of text.
145
* Normally called from inside ICU string transformation functions, not user code.
146
* @stable ICU 59
147
*/
148
void addUnchanged(int32_t unchangedLength);
149
/**
150
* Adds a change edit: a record for a text replacement/insertion/deletion.
151
* Normally called from inside ICU string transformation functions, not user code.
152
* @stable ICU 59
153
*/
154
void addReplace(int32_t oldLength, int32_t newLength);
155
/**
156
* Sets the UErrorCode if an error occurred while recording edits.
157
* Preserves older error codes in the outErrorCode.
158
* Normally called from inside ICU string transformation functions, not user code.
159
* @param outErrorCode Set to an error code if it does not contain one already
160
* and an error occurred while recording edits.
161
* Otherwise unchanged.
162
* @return TRUE if U_FAILURE(outErrorCode)
163
* @stable ICU 59
164
*/
165
UBool copyErrorTo(UErrorCode &outErrorCode) const;
166
167
/**
168
* How much longer is the new text compared with the old text?
169
* @return new length minus old length
170
* @stable ICU 59
171
*/
172
int32_t lengthDelta() const { return delta; }
173
/**
174
* @return TRUE if there are any change edits
175
* @stable ICU 59
176
*/
177
UBool hasChanges() const { return numChanges != 0; }
178
179
/**
180
* @return the number of change edits
181
* @stable ICU 60
182
*/
183
int32_t numberOfChanges() const { return numChanges; }
184
185
/**
186
* Access to the list of edits.
187
*
188
* At any moment in time, an instance of this class points to a single edit: a "window" into a span
189
* of the source string and the corresponding span of the destination string. The source string span
190
* starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
191
* span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
192
*
193
* The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`,
194
* and `findDestinationIndex(int32_t, UErrorCode &)` methods.
195
* Calling any of these methods mutates the iterator to make it point to the corresponding edit.
196
*
197
* For more information, see the documentation for {@link Edits}.
198
*
199
* @see getCoarseIterator
200
* @see getFineIterator
201
* @stable ICU 59
202
*/
203
struct U_COMMON_API Iterator U_FINAL : public UMemory {
204
/**
205
* Default constructor, empty iterator.
206
* @stable ICU 60
207
*/
208
Iterator() :
209
array(nullptr), index(0), length(0),
210
remaining(0), onlyChanges_(FALSE), coarse(FALSE),
211
dir(0), changed(FALSE), oldLength_(0), newLength_(0),
212
srcIndex(0), replIndex(0), destIndex(0) {}
213
/**
214
* Copy constructor.
215
* @stable ICU 59
216
*/
217
Iterator(const Iterator &other) = default;
218
/**
219
* Assignment operator.
220
* @stable ICU 59
221
*/
222
Iterator &operator=(const Iterator &other) = default;
223
224
/**
225
* Advances the iterator to the next edit.
226
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
227
* or else the function returns immediately. Check for U_FAILURE()
228
* on output or use with function chaining. (See User Guide for details.)
229
* @return TRUE if there is another edit
230
* @stable ICU 59
231
*/
232
UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
233
234
/**
235
* Moves the iterator to the edit that contains the source index.
236
* The source index may be found in a no-change edit
237
* even if normal iteration would skip no-change edits.
238
* Normal iteration can continue from a found edit.
239
*
240
* The iterator state before this search logically does not matter.
241
* (It may affect the performance of the search.)
242
*
243
* The iterator state after this search is undefined
244
* if the source index is out of bounds for the source string.
245
*
246
* @param i source index
247
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
248
* or else the function returns immediately. Check for U_FAILURE()
249
* on output or use with function chaining. (See User Guide for details.)
250
* @return TRUE if the edit for the source index was found
251
* @stable ICU 59
252
*/
253
UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
254
return findIndex(i, TRUE, errorCode) == 0;
255
}
256
257
/**
258
* Moves the iterator to the edit that contains the destination index.
259
* The destination index may be found in a no-change edit
260
* even if normal iteration would skip no-change edits.
261
* Normal iteration can continue from a found edit.
262
*
263
* The iterator state before this search logically does not matter.
264
* (It may affect the performance of the search.)
265
*
266
* The iterator state after this search is undefined
267
* if the source index is out of bounds for the source string.
268
*
269
* @param i destination index
270
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
271
* or else the function returns immediately. Check for U_FAILURE()
272
* on output or use with function chaining. (See User Guide for details.)
273
* @return TRUE if the edit for the destination index was found
274
* @stable ICU 60
275
*/
276
UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
277
return findIndex(i, FALSE, errorCode) == 0;
278
}
279
280
/**
281
* Computes the destination index corresponding to the given source index.
282
* If the source index is inside a change edit (not at its start),
283
* then the destination index at the end of that edit is returned,
284
* since there is no information about index mapping inside a change edit.
285
*
286
* (This means that indexes to the start and middle of an edit,
287
* for example around a grapheme cluster, are mapped to indexes
288
* encompassing the entire edit.
289
* The alternative, mapping an interior index to the start,
290
* would map such an interval to an empty one.)
291
*
292
* This operation will usually but not always modify this object.
293
* The iterator state after this search is undefined.
294
*
295
* @param i source index
296
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
297
* or else the function returns immediately. Check for U_FAILURE()
298
* on output or use with function chaining. (See User Guide for details.)
299
* @return destination index; undefined if i is not 0..string length
300
* @stable ICU 60
301
*/
302
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
303
304
/**
305
* Computes the source index corresponding to the given destination index.
306
* If the destination index is inside a change edit (not at its start),
307
* then the source index at the end of that edit is returned,
308
* since there is no information about index mapping inside a change edit.
309
*
310
* (This means that indexes to the start and middle of an edit,
311
* for example around a grapheme cluster, are mapped to indexes
312
* encompassing the entire edit.
313
* The alternative, mapping an interior index to the start,
314
* would map such an interval to an empty one.)
315
*
316
* This operation will usually but not always modify this object.
317
* The iterator state after this search is undefined.
318
*
319
* @param i destination index
320
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
321
* or else the function returns immediately. Check for U_FAILURE()
322
* on output or use with function chaining. (See User Guide for details.)
323
* @return source index; undefined if i is not 0..string length
324
* @stable ICU 60
325
*/
326
int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
327
328
/**
329
* Returns whether the edit currently represented by the iterator is a change edit.
330
*
331
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
332
* FALSE if oldLength units remain unchanged.
333
* @stable ICU 59
334
*/
335
UBool hasChange() const { return changed; }
336
337
/**
338
* The length of the current span in the source string, which starts at {@link #sourceIndex}.
339
*
340
* @return the number of units in the original string which are replaced or remain unchanged.
341
* @stable ICU 59
342
*/
343
int32_t oldLength() const { return oldLength_; }
344
345
/**
346
* The length of the current span in the destination string, which starts at
347
* {@link #destinationIndex}, or in the replacement string, which starts at
348
* {@link #replacementIndex}.
349
*
350
* @return the number of units in the modified string, if hasChange() is TRUE.
351
* Same as oldLength if hasChange() is FALSE.
352
* @stable ICU 59
353
*/
354
int32_t newLength() const { return newLength_; }
355
356
/**
357
* The start index of the current span in the source string; the span has length
358
* {@link #oldLength}.
359
*
360
* @return the current index into the source string
361
* @stable ICU 59
362
*/
363
int32_t sourceIndex() const { return srcIndex; }
364
365
/**
366
* The start index of the current span in the replacement string; the span has length
367
* {@link #newLength}. Well-defined only if the current edit is a change edit.
368
*
369
* The *replacement string* is the concatenation of all substrings of the destination
370
* string corresponding to change edits.
371
*
372
* This method is intended to be used together with operations that write only replacement
373
* characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option).
374
* The source string can then be modified in-place.
375
*
376
* @return the current index into the replacement-characters-only string,
377
* not counting unchanged spans
378
* @stable ICU 59
379
*/
380
int32_t replacementIndex() const {
381
// TODO: Throw an exception if we aren't in a change edit?
382
return replIndex;
383
}
384
385
/**
386
* The start index of the current span in the destination string; the span has length
387
* {@link #newLength}.
388
*
389
* @return the current index into the full destination string
390
* @stable ICU 59
391
*/
392
int32_t destinationIndex() const { return destIndex; }
393
394
#ifndef U_HIDE_INTERNAL_API
395
/**
396
* A string representation of the current edit represented by the iterator for debugging. You
397
* should not depend on the contents of the return string.
398
* @internal
399
*/
400
UnicodeString& toString(UnicodeString& appendTo) const;
401
#endif // U_HIDE_INTERNAL_API
402
403
private:
404
friend class Edits;
405
406
Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
407
408
int32_t readLength(int32_t head);
409
void updateNextIndexes();
410
void updatePreviousIndexes();
411
UBool noNext();
412
UBool next(UBool onlyChanges, UErrorCode &errorCode);
413
UBool previous(UErrorCode &errorCode);
414
/** @return -1: error or i<0; 0: found; 1: i>=string length */
415
int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);
416
417
const uint16_t *array;
418
int32_t index, length;
419
// 0 if we are not within compressed equal-length changes.
420
// Otherwise the number of remaining changes, including the current one.
421
int32_t remaining;
422
UBool onlyChanges_, coarse;
423
424
int8_t dir; // iteration direction: back(<0), initial(0), forward(>0)
425
UBool changed;
426
int32_t oldLength_, newLength_;
427
int32_t srcIndex, replIndex, destIndex;
428
};
429
430
/**
431
* Returns an Iterator for coarse-grained change edits
432
* (adjacent change edits are treated as one).
433
* Can be used to perform simple string updates.
434
* Skips no-change edits.
435
* @return an Iterator that merges adjacent changes.
436
* @stable ICU 59
437
*/
438
Iterator getCoarseChangesIterator() const {
439
return Iterator(array, length, TRUE, TRUE);
440
}
441
442
/**
443
* Returns an Iterator for coarse-grained change and no-change edits
444
* (adjacent change edits are treated as one).
445
* Can be used to perform simple string updates.
446
* Adjacent change edits are treated as one edit.
447
* @return an Iterator that merges adjacent changes.
448
* @stable ICU 59
449
*/
450
Iterator getCoarseIterator() const {
451
return Iterator(array, length, FALSE, TRUE);
452
}
453
454
/**
455
* Returns an Iterator for fine-grained change edits
456
* (full granularity of change edits is retained).
457
* Can be used for modifying styled text.
458
* Skips no-change edits.
459
* @return an Iterator that separates adjacent changes.
460
* @stable ICU 59
461
*/
462
Iterator getFineChangesIterator() const {
463
return Iterator(array, length, TRUE, FALSE);
464
}
465
466
/**
467
* Returns an Iterator for fine-grained change and no-change edits
468
* (full granularity of change edits is retained).
469
* Can be used for modifying styled text.
470
* @return an Iterator that separates adjacent changes.
471
* @stable ICU 59
472
*/
473
Iterator getFineIterator() const {
474
return Iterator(array, length, FALSE, FALSE);
475
}
476
477
/**
478
* Merges the two input Edits and appends the result to this object.
479
*
480
* Consider two string transformations (for example, normalization and case mapping)
481
* where each records Edits in addition to writing an output string.<br>
482
* Edits ab reflect how substrings of input string a
483
* map to substrings of intermediate string b.<br>
484
* Edits bc reflect how substrings of intermediate string b
485
* map to substrings of output string c.<br>
486
* This function merges ab and bc such that the additional edits
487
* recorded in this object reflect how substrings of input string a
488
* map to substrings of output string c.
489
*
490
* If unrelated Edits are passed in where the output string of the first
491
* has a different length than the input string of the second,
492
* then a U_ILLEGAL_ARGUMENT_ERROR is reported.
493
*
494
* @param ab reflects how substrings of input string a
495
* map to substrings of intermediate string b.
496
* @param bc reflects how substrings of intermediate string b
497
* map to substrings of output string c.
498
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
499
* or else the function returns immediately. Check for U_FAILURE()
500
* on output or use with function chaining. (See User Guide for details.)
501
* @return *this, with the merged edits appended
502
* @stable ICU 60
503
*/
504
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
505
506
private:
507
void releaseArray() U_NOEXCEPT;
508
Edits &copyArray(const Edits &other);
509
Edits &moveArray(Edits &src) U_NOEXCEPT;
510
511
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
512
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
513
514
void append(int32_t r);
515
UBool growArray();
516
517
static const int32_t STACK_CAPACITY = 100;
518
uint16_t *array;
519
int32_t capacity;
520
int32_t length;
521
int32_t delta;
522
int32_t numChanges;
523
UErrorCode errorCode_;
524
uint16_t stackArray[STACK_CAPACITY];
525
};
526
527
U_NAMESPACE_END
528
529
#endif /* U_SHOW_CPLUSPLUS_API */
530
531
#endif // __EDITS_H__
532
533